rf_netbsdkintf.c revision 1.33 1 /* $NetBSD: rf_netbsdkintf.c,v 1.33 1999/12/03 03:06:44 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #ifdef DEBUG
157 #define db0_printf(a) printf a
158 #define db_printf(a) if (rf_kdebug_level > 0) printf a
159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
160 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
161 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
162 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
163 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
164 #else /* DEBUG */
165 #define db0_printf(a) printf a
166 #define db1_printf(a) { }
167 #define db2_printf(a) { }
168 #define db3_printf(a) { }
169 #define db4_printf(a) { }
170 #define db5_printf(a) { }
171 #endif /* DEBUG */
172
173 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
174
175 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
176
177 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
178 * spare table */
179 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
180 * installation process */
181
182 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
183 * reconstruction
184 * requests */
185
186
187 decl_simple_lock_data(, recon_queue_mutex)
188 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
189 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
190
191 /* prototypes */
192 static void KernelWakeupFunc(struct buf * bp);
193 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
194 dev_t dev, RF_SectorNum_t startSect,
195 RF_SectorCount_t numSect, caddr_t buf,
196 void (*cbFunc) (struct buf *), void *cbArg,
197 int logBytesPerSector, struct proc * b_proc);
198
199 #define Dprintf0(s) if (rf_queueDebug) \
200 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
201 #define Dprintf1(s,a) if (rf_queueDebug) \
202 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
203 #define Dprintf2(s,a,b) if (rf_queueDebug) \
204 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
205 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
206 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
207
208 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
209 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
210
211 void raidattach __P((int));
212 int raidsize __P((dev_t));
213
214 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
215 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
216 static int raidinit __P((dev_t, RF_Raid_t *, int));
217
218 int raidopen __P((dev_t, int, int, struct proc *));
219 int raidclose __P((dev_t, int, int, struct proc *));
220 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
221 int raidwrite __P((dev_t, struct uio *, int));
222 int raidread __P((dev_t, struct uio *, int));
223 void raidstrategy __P((struct buf *));
224 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
225
226 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
227 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
228 void rf_update_component_labels( RF_Raid_t *);
229 /*
230 * Pilfered from ccd.c
231 */
232
233 struct raidbuf {
234 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
235 struct buf *rf_obp; /* ptr. to original I/O buf */
236 int rf_flags; /* misc. flags */
237 RF_DiskQueueData_t *req;/* the request that this was part of.. */
238 };
239
240
241 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
242 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
243
244 /* XXX Not sure if the following should be replacing the raidPtrs above,
245 or if it should be used in conjunction with that... */
246
247 struct raid_softc {
248 int sc_flags; /* flags */
249 int sc_cflags; /* configuration flags */
250 size_t sc_size; /* size of the raid device */
251 dev_t sc_dev; /* our device.. */
252 char sc_xname[20]; /* XXX external name */
253 struct disk sc_dkdev; /* generic disk device info */
254 struct pool sc_cbufpool; /* component buffer pool */
255 };
256 /* sc_flags */
257 #define RAIDF_INITED 0x01 /* unit has been initialized */
258 #define RAIDF_WLABEL 0x02 /* label area is writable */
259 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
260 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
261 #define RAIDF_LOCKED 0x80 /* unit is locked */
262
263 #define raidunit(x) DISKUNIT(x)
264 static int numraid = 0;
265
266 /*
267 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
268 * Be aware that large numbers can allow the driver to consume a lot of
269 * kernel memory, especially on writes, and in degraded mode reads.
270 *
271 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
272 * a single 64K write will typically require 64K for the old data,
273 * 64K for the old parity, and 64K for the new parity, for a total
274 * of 192K (if the parity buffer is not re-used immediately).
275 * Even it if is used immedately, that's still 128K, which when multiplied
276 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
277 *
278 * Now in degraded mode, for example, a 64K read on the above setup may
279 * require data reconstruction, which will require *all* of the 4 remaining
280 * disks to participate -- 4 * 32K/disk == 128K again.
281 */
282
283 #ifndef RAIDOUTSTANDING
284 #define RAIDOUTSTANDING 6
285 #endif
286
287 #define RAIDLABELDEV(dev) \
288 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
289
290 /* declared here, and made public, for the benefit of KVM stuff.. */
291 struct raid_softc *raid_softc;
292
293 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
294 struct disklabel *));
295 static void raidgetdisklabel __P((dev_t));
296 static void raidmakedisklabel __P((struct raid_softc *));
297
298 static int raidlock __P((struct raid_softc *));
299 static void raidunlock __P((struct raid_softc *));
300 int raidlookup __P((char *, struct proc * p, struct vnode **));
301
302 static void rf_markalldirty __P((RF_Raid_t *));
303
304 void
305 raidattach(num)
306 int num;
307 {
308 int raidID;
309 int i, rc;
310
311 #ifdef DEBUG
312 printf("raidattach: Asked for %d units\n", num);
313 #endif
314
315 if (num <= 0) {
316 #ifdef DIAGNOSTIC
317 panic("raidattach: count <= 0");
318 #endif
319 return;
320 }
321 /* This is where all the initialization stuff gets done. */
322
323 /* Make some space for requested number of units... */
324
325 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
326 if (raidPtrs == NULL) {
327 panic("raidPtrs is NULL!!\n");
328 }
329
330 rc = rf_mutex_init(&rf_sparet_wait_mutex);
331 if (rc) {
332 RF_PANIC();
333 }
334
335 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
336 recon_queue = NULL;
337
338 for (i = 0; i < numraid; i++)
339 raidPtrs[i] = NULL;
340 rc = rf_BootRaidframe();
341 if (rc == 0)
342 printf("Kernelized RAIDframe activated\n");
343 else
344 panic("Serious error booting RAID!!\n");
345
346 /* put together some datastructures like the CCD device does.. This
347 * lets us lock the device and what-not when it gets opened. */
348
349 raid_softc = (struct raid_softc *)
350 malloc(num * sizeof(struct raid_softc),
351 M_RAIDFRAME, M_NOWAIT);
352 if (raid_softc == NULL) {
353 printf("WARNING: no memory for RAIDframe driver\n");
354 return;
355 }
356 numraid = num;
357 bzero(raid_softc, num * sizeof(struct raid_softc));
358
359 for (raidID = 0; raidID < num; raidID++) {
360 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
361 (RF_Raid_t *));
362 if (raidPtrs[raidID] == NULL) {
363 printf("raidPtrs[%d] is NULL\n", raidID);
364 }
365 }
366 }
367
368
369 int
370 raidsize(dev)
371 dev_t dev;
372 {
373 struct raid_softc *rs;
374 struct disklabel *lp;
375 int part, unit, omask, size;
376
377 unit = raidunit(dev);
378 if (unit >= numraid)
379 return (-1);
380 rs = &raid_softc[unit];
381
382 if ((rs->sc_flags & RAIDF_INITED) == 0)
383 return (-1);
384
385 part = DISKPART(dev);
386 omask = rs->sc_dkdev.dk_openmask & (1 << part);
387 lp = rs->sc_dkdev.dk_label;
388
389 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
390 return (-1);
391
392 if (lp->d_partitions[part].p_fstype != FS_SWAP)
393 size = -1;
394 else
395 size = lp->d_partitions[part].p_size *
396 (lp->d_secsize / DEV_BSIZE);
397
398 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
399 return (-1);
400
401 return (size);
402
403 }
404
405 int
406 raiddump(dev, blkno, va, size)
407 dev_t dev;
408 daddr_t blkno;
409 caddr_t va;
410 size_t size;
411 {
412 /* Not implemented. */
413 return ENXIO;
414 }
415 /* ARGSUSED */
416 int
417 raidopen(dev, flags, fmt, p)
418 dev_t dev;
419 int flags, fmt;
420 struct proc *p;
421 {
422 int unit = raidunit(dev);
423 struct raid_softc *rs;
424 struct disklabel *lp;
425 int part, pmask;
426 int error = 0;
427
428 if (unit >= numraid)
429 return (ENXIO);
430 rs = &raid_softc[unit];
431
432 if ((error = raidlock(rs)) != 0)
433 return (error);
434 lp = rs->sc_dkdev.dk_label;
435
436 part = DISKPART(dev);
437 pmask = (1 << part);
438
439 db1_printf(("Opening raid device number: %d partition: %d\n",
440 unit, part));
441
442
443 if ((rs->sc_flags & RAIDF_INITED) &&
444 (rs->sc_dkdev.dk_openmask == 0))
445 raidgetdisklabel(dev);
446
447 /* make sure that this partition exists */
448
449 if (part != RAW_PART) {
450 db1_printf(("Not a raw partition..\n"));
451 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
452 ((part >= lp->d_npartitions) ||
453 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
454 error = ENXIO;
455 raidunlock(rs);
456 db1_printf(("Bailing out...\n"));
457 return (error);
458 }
459 }
460 /* Prevent this unit from being unconfigured while open. */
461 switch (fmt) {
462 case S_IFCHR:
463 rs->sc_dkdev.dk_copenmask |= pmask;
464 break;
465
466 case S_IFBLK:
467 rs->sc_dkdev.dk_bopenmask |= pmask;
468 break;
469 }
470
471 if ((rs->sc_dkdev.dk_openmask == 0) &&
472 ((rs->sc_flags & RAIDF_INITED) != 0)) {
473 /* First one... mark things as dirty... Note that we *MUST*
474 have done a configure before this. I DO NOT WANT TO BE
475 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
476 THAT THEY BELONG TOGETHER!!!!! */
477 /* XXX should check to see if we're only open for reading
478 here... If so, we needn't do this, but then need some
479 other way of keeping track of what's happened.. */
480
481 rf_markalldirty( raidPtrs[unit] );
482 }
483
484
485 rs->sc_dkdev.dk_openmask =
486 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
487
488 raidunlock(rs);
489
490 return (error);
491
492
493 }
494 /* ARGSUSED */
495 int
496 raidclose(dev, flags, fmt, p)
497 dev_t dev;
498 int flags, fmt;
499 struct proc *p;
500 {
501 int unit = raidunit(dev);
502 struct raid_softc *rs;
503 int error = 0;
504 int part;
505
506 if (unit >= numraid)
507 return (ENXIO);
508 rs = &raid_softc[unit];
509
510 if ((error = raidlock(rs)) != 0)
511 return (error);
512
513 part = DISKPART(dev);
514
515 /* ...that much closer to allowing unconfiguration... */
516 switch (fmt) {
517 case S_IFCHR:
518 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
519 break;
520
521 case S_IFBLK:
522 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
523 break;
524 }
525 rs->sc_dkdev.dk_openmask =
526 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
527
528 if ((rs->sc_dkdev.dk_openmask == 0) &&
529 ((rs->sc_flags & RAIDF_INITED) != 0)) {
530 /* Last one... device is not unconfigured yet.
531 Device shutdown has taken care of setting the
532 clean bits if RAIDF_INITED is not set
533 mark things as clean... */
534 rf_update_component_labels( raidPtrs[unit] );
535 }
536
537 raidunlock(rs);
538 return (0);
539
540 }
541
542 void
543 raidstrategy(bp)
544 register struct buf *bp;
545 {
546 register int s;
547
548 unsigned int raidID = raidunit(bp->b_dev);
549 RF_Raid_t *raidPtr;
550 struct raid_softc *rs = &raid_softc[raidID];
551 struct disklabel *lp;
552 int wlabel;
553
554 #if 0
555 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
556 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
557 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
558 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
559 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
560
561 if (bp->b_flags & B_READ)
562 db1_printf(("READ\n"));
563 else
564 db1_printf(("WRITE\n"));
565 #endif
566 if ((rs->sc_flags & RAIDF_INITED) ==0) {
567 bp->b_error = ENXIO;
568 bp->b_flags = B_ERROR;
569 bp->b_resid = bp->b_bcount;
570 biodone(bp);
571 return;
572 }
573 if (raidID >= numraid || !raidPtrs[raidID]) {
574 bp->b_error = ENODEV;
575 bp->b_flags |= B_ERROR;
576 bp->b_resid = bp->b_bcount;
577 biodone(bp);
578 return;
579 }
580 raidPtr = raidPtrs[raidID];
581 if (!raidPtr->valid) {
582 bp->b_error = ENODEV;
583 bp->b_flags |= B_ERROR;
584 bp->b_resid = bp->b_bcount;
585 biodone(bp);
586 return;
587 }
588 if (bp->b_bcount == 0) {
589 db1_printf(("b_bcount is zero..\n"));
590 biodone(bp);
591 return;
592 }
593 lp = rs->sc_dkdev.dk_label;
594
595 /*
596 * Do bounds checking and adjust transfer. If there's an
597 * error, the bounds check will flag that for us.
598 */
599
600 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
601 if (DISKPART(bp->b_dev) != RAW_PART)
602 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
603 db1_printf(("Bounds check failed!!:%d %d\n",
604 (int) bp->b_blkno, (int) wlabel));
605 biodone(bp);
606 return;
607 }
608 s = splbio(); /* XXX Needed? */
609 db1_printf(("Beginning strategy...\n"));
610
611 bp->b_resid = 0;
612 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
613 NULL, NULL, NULL);
614 if (bp->b_error) {
615 bp->b_flags |= B_ERROR;
616 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
617 bp->b_error));
618 }
619 splx(s);
620 #if 0
621 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
622 bp, bp->b_data,
623 (int) bp->b_bcount, (int) bp->b_resid));
624 #endif
625 }
626 /* ARGSUSED */
627 int
628 raidread(dev, uio, flags)
629 dev_t dev;
630 struct uio *uio;
631 int flags;
632 {
633 int unit = raidunit(dev);
634 struct raid_softc *rs;
635 int part;
636
637 if (unit >= numraid)
638 return (ENXIO);
639 rs = &raid_softc[unit];
640
641 if ((rs->sc_flags & RAIDF_INITED) == 0)
642 return (ENXIO);
643 part = DISKPART(dev);
644
645 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
646
647 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
648
649 }
650 /* ARGSUSED */
651 int
652 raidwrite(dev, uio, flags)
653 dev_t dev;
654 struct uio *uio;
655 int flags;
656 {
657 int unit = raidunit(dev);
658 struct raid_softc *rs;
659
660 if (unit >= numraid)
661 return (ENXIO);
662 rs = &raid_softc[unit];
663
664 if ((rs->sc_flags & RAIDF_INITED) == 0)
665 return (ENXIO);
666 db1_printf(("raidwrite\n"));
667 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
668
669 }
670
671 int
672 raidioctl(dev, cmd, data, flag, p)
673 dev_t dev;
674 u_long cmd;
675 caddr_t data;
676 int flag;
677 struct proc *p;
678 {
679 int unit = raidunit(dev);
680 int error = 0;
681 int part, pmask;
682 struct raid_softc *rs;
683 RF_Config_t *k_cfg, *u_cfg;
684 u_char *specific_buf;
685 int retcode = 0;
686 int row;
687 int column;
688 int s;
689 struct rf_recon_req *rrcopy, *rr;
690 RF_ComponentLabel_t *component_label;
691 RF_ComponentLabel_t ci_label;
692 RF_ComponentLabel_t **c_label_ptr;
693 RF_SingleComponent_t *sparePtr,*componentPtr;
694 RF_SingleComponent_t hot_spare;
695 RF_SingleComponent_t component;
696
697 if (unit >= numraid)
698 return (ENXIO);
699 rs = &raid_softc[unit];
700
701 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
702 (int) DISKPART(dev), (int) unit, (int) cmd));
703
704 /* Must be open for writes for these commands... */
705 switch (cmd) {
706 case DIOCSDINFO:
707 case DIOCWDINFO:
708 case DIOCWLABEL:
709 if ((flag & FWRITE) == 0)
710 return (EBADF);
711 }
712
713 /* Must be initialized for these... */
714 switch (cmd) {
715 case DIOCGDINFO:
716 case DIOCSDINFO:
717 case DIOCWDINFO:
718 case DIOCGPART:
719 case DIOCWLABEL:
720 case DIOCGDEFLABEL:
721 case RAIDFRAME_SHUTDOWN:
722 case RAIDFRAME_REWRITEPARITY:
723 case RAIDFRAME_GET_INFO:
724 case RAIDFRAME_RESET_ACCTOTALS:
725 case RAIDFRAME_GET_ACCTOTALS:
726 case RAIDFRAME_KEEP_ACCTOTALS:
727 case RAIDFRAME_GET_SIZE:
728 case RAIDFRAME_FAIL_DISK:
729 case RAIDFRAME_COPYBACK:
730 case RAIDFRAME_CHECKRECON:
731 case RAIDFRAME_GET_COMPONENT_LABEL:
732 case RAIDFRAME_SET_COMPONENT_LABEL:
733 case RAIDFRAME_ADD_HOT_SPARE:
734 case RAIDFRAME_REMOVE_HOT_SPARE:
735 case RAIDFRAME_INIT_LABELS:
736 case RAIDFRAME_REBUILD_IN_PLACE:
737 case RAIDFRAME_CHECK_PARITY:
738 if ((rs->sc_flags & RAIDF_INITED) == 0)
739 return (ENXIO);
740 }
741
742 switch (cmd) {
743
744
745 /* configure the system */
746 case RAIDFRAME_CONFIGURE:
747
748 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
749 /* copy-in the configuration information */
750 /* data points to a pointer to the configuration structure */
751 u_cfg = *((RF_Config_t **) data);
752 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
753 if (k_cfg == NULL) {
754 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
755 return (ENOMEM);
756 }
757 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
758 sizeof(RF_Config_t));
759 if (retcode) {
760 RF_Free(k_cfg, sizeof(RF_Config_t));
761 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
762 retcode));
763 return (retcode);
764 }
765 /* allocate a buffer for the layout-specific data, and copy it
766 * in */
767 if (k_cfg->layoutSpecificSize) {
768 if (k_cfg->layoutSpecificSize > 10000) {
769 /* sanity check */
770 RF_Free(k_cfg, sizeof(RF_Config_t));
771 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
772 return (EINVAL);
773 }
774 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
775 (u_char *));
776 if (specific_buf == NULL) {
777 RF_Free(k_cfg, sizeof(RF_Config_t));
778 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
779 return (ENOMEM);
780 }
781 retcode = copyin(k_cfg->layoutSpecific,
782 (caddr_t) specific_buf,
783 k_cfg->layoutSpecificSize);
784 if (retcode) {
785 RF_Free(k_cfg, sizeof(RF_Config_t));
786 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
787 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
788 retcode));
789 return (retcode);
790 }
791 } else
792 specific_buf = NULL;
793 k_cfg->layoutSpecific = specific_buf;
794
795 /* should do some kind of sanity check on the configuration.
796 * Store the sum of all the bytes in the last byte? */
797
798 /* configure the system */
799
800 raidPtrs[unit]->raidid = unit;
801
802 retcode = rf_Configure(raidPtrs[unit], k_cfg);
803
804 /* allow this many simultaneous IO's to this RAID device */
805 raidPtrs[unit]->openings = RAIDOUTSTANDING;
806
807 if (retcode == 0) {
808 retcode = raidinit(dev, raidPtrs[unit], unit);
809 rf_markalldirty( raidPtrs[unit] );
810 }
811 /* free the buffers. No return code here. */
812 if (k_cfg->layoutSpecificSize) {
813 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
814 }
815 RF_Free(k_cfg, sizeof(RF_Config_t));
816
817 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
818 retcode));
819
820 return (retcode);
821
822 /* shutdown the system */
823 case RAIDFRAME_SHUTDOWN:
824
825 if ((error = raidlock(rs)) != 0)
826 return (error);
827
828 /*
829 * If somebody has a partition mounted, we shouldn't
830 * shutdown.
831 */
832
833 part = DISKPART(dev);
834 pmask = (1 << part);
835 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
836 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
837 (rs->sc_dkdev.dk_copenmask & pmask))) {
838 raidunlock(rs);
839 return (EBUSY);
840 }
841
842 if (rf_debugKernelAccess) {
843 printf("call shutdown\n");
844 }
845
846 retcode = rf_Shutdown(raidPtrs[unit]);
847
848 db1_printf(("Done main shutdown\n"));
849
850 pool_destroy(&rs->sc_cbufpool);
851 db1_printf(("Done freeing component buffer freelist\n"));
852
853 /* It's no longer initialized... */
854 rs->sc_flags &= ~RAIDF_INITED;
855
856 /* Detach the disk. */
857 disk_detach(&rs->sc_dkdev);
858
859 raidunlock(rs);
860
861 return (retcode);
862 case RAIDFRAME_GET_COMPONENT_LABEL:
863 c_label_ptr = (RF_ComponentLabel_t **) data;
864 /* need to read the component label for the disk indicated
865 by row,column in component_label
866 XXX need to sanity check these values!!!
867 */
868
869 /* For practice, let's get it directly fromdisk, rather
870 than from the in-core copy */
871 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
872 (RF_ComponentLabel_t *));
873 if (component_label == NULL)
874 return (ENOMEM);
875
876 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
877
878 retcode = copyin( *c_label_ptr, component_label,
879 sizeof(RF_ComponentLabel_t));
880
881 if (retcode) {
882 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
883 return(retcode);
884 }
885
886 row = component_label->row;
887 column = component_label->column;
888
889 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
890 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
891 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
892 return(EINVAL);
893 }
894
895 raidread_component_label(
896 raidPtrs[unit]->Disks[row][column].dev,
897 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
898 component_label );
899
900 retcode = copyout((caddr_t) component_label,
901 (caddr_t) *c_label_ptr,
902 sizeof(RF_ComponentLabel_t));
903 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
904 return (retcode);
905
906 case RAIDFRAME_SET_COMPONENT_LABEL:
907 component_label = (RF_ComponentLabel_t *) data;
908
909 /* XXX check the label for valid stuff... */
910 /* Note that some things *should not* get modified --
911 the user should be re-initing the labels instead of
912 trying to patch things.
913 */
914
915 printf("Got component label:\n");
916 printf("Version: %d\n",component_label->version);
917 printf("Serial Number: %d\n",component_label->serial_number);
918 printf("Mod counter: %d\n",component_label->mod_counter);
919 printf("Row: %d\n", component_label->row);
920 printf("Column: %d\n", component_label->column);
921 printf("Num Rows: %d\n", component_label->num_rows);
922 printf("Num Columns: %d\n", component_label->num_columns);
923 printf("Clean: %d\n", component_label->clean);
924 printf("Status: %d\n", component_label->status);
925
926 row = component_label->row;
927 column = component_label->column;
928
929 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
930 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
931 return(EINVAL);
932 }
933
934 /* XXX this isn't allowed to do anything for now :-) */
935 #if 0
936 raidwrite_component_label(
937 raidPtrs[unit]->Disks[row][column].dev,
938 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
939 component_label );
940 #endif
941 return (0);
942
943 case RAIDFRAME_INIT_LABELS:
944 component_label = (RF_ComponentLabel_t *) data;
945 /*
946 we only want the serial number from
947 the above. We get all the rest of the information
948 from the config that was used to create this RAID
949 set.
950 */
951
952 raidPtrs[unit]->serial_number = component_label->serial_number;
953 /* current version number */
954 ci_label.version = RF_COMPONENT_LABEL_VERSION;
955 ci_label.serial_number = component_label->serial_number;
956 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
957 ci_label.num_rows = raidPtrs[unit]->numRow;
958 ci_label.num_columns = raidPtrs[unit]->numCol;
959 ci_label.clean = RF_RAID_DIRTY; /* not clean */
960 ci_label.status = rf_ds_optimal; /* "It's good!" */
961
962 for(row=0;row<raidPtrs[unit]->numRow;row++) {
963 ci_label.row = row;
964 for(column=0;column<raidPtrs[unit]->numCol;column++) {
965 ci_label.column = column;
966 raidwrite_component_label(
967 raidPtrs[unit]->Disks[row][column].dev,
968 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
969 &ci_label );
970 }
971 }
972
973 return (retcode);
974
975 /* initialize all parity */
976 case RAIDFRAME_REWRITEPARITY:
977
978 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
979 /* Parity for RAID 0 is trivially correct */
980 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
981 return(0);
982 }
983
984 /* borrow the thread of the requesting process */
985
986 s = splbio();
987 retcode = rf_RewriteParity(raidPtrs[unit]);
988 splx(s);
989 /* return I/O Error if the parity rewrite fails */
990
991 if (retcode) {
992 retcode = EIO;
993 } else {
994 /* set the clean bit! If we shutdown correctly,
995 the clean bit on each component label will get
996 set */
997 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
998 }
999 return (retcode);
1000
1001
1002 case RAIDFRAME_ADD_HOT_SPARE:
1003 sparePtr = (RF_SingleComponent_t *) data;
1004 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1005 printf("Adding spare\n");
1006 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1007 return(retcode);
1008
1009 case RAIDFRAME_REMOVE_HOT_SPARE:
1010 return(retcode);
1011
1012 case RAIDFRAME_REBUILD_IN_PLACE:
1013
1014 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1015 /* Can't do this on a RAID 0!! */
1016 return(EINVAL);
1017 }
1018
1019 componentPtr = (RF_SingleComponent_t *) data;
1020 memcpy( &component, componentPtr,
1021 sizeof(RF_SingleComponent_t));
1022 row = component.row;
1023 column = component.column;
1024 printf("Rebuild: %d %d\n",row, column);
1025 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1026 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1027 return(EINVAL);
1028 }
1029 printf("Attempting a rebuild in place\n");
1030 s = splbio();
1031 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1032 splx(s);
1033 return(retcode);
1034
1035 case RAIDFRAME_GET_INFO:
1036 {
1037 RF_Raid_t *raid = raidPtrs[unit];
1038 RF_DeviceConfig_t *cfg, **ucfgp;
1039 int i, j, d;
1040
1041 if (!raid->valid)
1042 return (ENODEV);
1043 ucfgp = (RF_DeviceConfig_t **) data;
1044 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1045 (RF_DeviceConfig_t *));
1046 if (cfg == NULL)
1047 return (ENOMEM);
1048 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1049 cfg->rows = raid->numRow;
1050 cfg->cols = raid->numCol;
1051 cfg->ndevs = raid->numRow * raid->numCol;
1052 if (cfg->ndevs >= RF_MAX_DISKS) {
1053 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1054 return (ENOMEM);
1055 }
1056 cfg->nspares = raid->numSpare;
1057 if (cfg->nspares >= RF_MAX_DISKS) {
1058 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1059 return (ENOMEM);
1060 }
1061 cfg->maxqdepth = raid->maxQueueDepth;
1062 d = 0;
1063 for (i = 0; i < cfg->rows; i++) {
1064 for (j = 0; j < cfg->cols; j++) {
1065 cfg->devs[d] = raid->Disks[i][j];
1066 d++;
1067 }
1068 }
1069 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1070 cfg->spares[i] = raid->Disks[0][j];
1071 }
1072 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1073 sizeof(RF_DeviceConfig_t));
1074 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1075
1076 return (retcode);
1077 }
1078 break;
1079 case RAIDFRAME_CHECK_PARITY:
1080 *(int *) data = raidPtrs[unit]->parity_good;
1081 return (0);
1082 case RAIDFRAME_RESET_ACCTOTALS:
1083 {
1084 RF_Raid_t *raid = raidPtrs[unit];
1085
1086 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1087 return (0);
1088 }
1089 break;
1090
1091 case RAIDFRAME_GET_ACCTOTALS:
1092 {
1093 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1094 RF_Raid_t *raid = raidPtrs[unit];
1095
1096 *totals = raid->acc_totals;
1097 return (0);
1098 }
1099 break;
1100
1101 case RAIDFRAME_KEEP_ACCTOTALS:
1102 {
1103 RF_Raid_t *raid = raidPtrs[unit];
1104 int *keep = (int *) data;
1105
1106 raid->keep_acc_totals = *keep;
1107 return (0);
1108 }
1109 break;
1110
1111 case RAIDFRAME_GET_SIZE:
1112 *(int *) data = raidPtrs[unit]->totalSectors;
1113 return (0);
1114
1115 #define RAIDFRAME_RECON 1
1116 /* XXX The above should probably be set somewhere else!! GO */
1117 #if RAIDFRAME_RECON > 0
1118
1119 /* fail a disk & optionally start reconstruction */
1120 case RAIDFRAME_FAIL_DISK:
1121
1122 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1123 /* Can't do this on a RAID 0!! */
1124 return(EINVAL);
1125 }
1126
1127 rr = (struct rf_recon_req *) data;
1128
1129 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1130 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1131 return (EINVAL);
1132
1133 printf("raid%d: Failing the disk: row: %d col: %d\n",
1134 unit, rr->row, rr->col);
1135
1136 /* make a copy of the recon request so that we don't rely on
1137 * the user's buffer */
1138 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1139 bcopy(rr, rrcopy, sizeof(*rr));
1140 rrcopy->raidPtr = (void *) raidPtrs[unit];
1141
1142 LOCK_RECON_Q_MUTEX();
1143 rrcopy->next = recon_queue;
1144 recon_queue = rrcopy;
1145 wakeup(&recon_queue);
1146 UNLOCK_RECON_Q_MUTEX();
1147
1148 return (0);
1149
1150 /* invoke a copyback operation after recon on whatever disk
1151 * needs it, if any */
1152 case RAIDFRAME_COPYBACK:
1153
1154 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1155 /* This makes no sense on a RAID 0!! */
1156 return(EINVAL);
1157 }
1158
1159 /* borrow the current thread to get this done */
1160
1161 s = splbio();
1162 rf_CopybackReconstructedData(raidPtrs[unit]);
1163 splx(s);
1164 return (0);
1165
1166 /* return the percentage completion of reconstruction */
1167 case RAIDFRAME_CHECKRECON:
1168 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1169 /* This makes no sense on a RAID 0 */
1170 return(EINVAL);
1171 }
1172
1173 row = *(int *) data;
1174 if (row < 0 || row >= raidPtrs[unit]->numRow)
1175 return (EINVAL);
1176 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1177 *(int *) data = 100;
1178 else
1179 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1180 return (0);
1181
1182 /* the sparetable daemon calls this to wait for the kernel to
1183 * need a spare table. this ioctl does not return until a
1184 * spare table is needed. XXX -- calling mpsleep here in the
1185 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1186 * -- I should either compute the spare table in the kernel,
1187 * or have a different -- XXX XXX -- interface (a different
1188 * character device) for delivering the table -- XXX */
1189 #if 0
1190 case RAIDFRAME_SPARET_WAIT:
1191 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1192 while (!rf_sparet_wait_queue)
1193 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1194 waitreq = rf_sparet_wait_queue;
1195 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1196 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1197
1198 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1199
1200 RF_Free(waitreq, sizeof(*waitreq));
1201 return (0);
1202
1203
1204 /* wakes up a process waiting on SPARET_WAIT and puts an error
1205 * code in it that will cause the dameon to exit */
1206 case RAIDFRAME_ABORT_SPARET_WAIT:
1207 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1208 waitreq->fcol = -1;
1209 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1210 waitreq->next = rf_sparet_wait_queue;
1211 rf_sparet_wait_queue = waitreq;
1212 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1213 wakeup(&rf_sparet_wait_queue);
1214 return (0);
1215
1216 /* used by the spare table daemon to deliver a spare table
1217 * into the kernel */
1218 case RAIDFRAME_SEND_SPARET:
1219
1220 /* install the spare table */
1221 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1222
1223 /* respond to the requestor. the return status of the spare
1224 * table installation is passed in the "fcol" field */
1225 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1226 waitreq->fcol = retcode;
1227 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1228 waitreq->next = rf_sparet_resp_queue;
1229 rf_sparet_resp_queue = waitreq;
1230 wakeup(&rf_sparet_resp_queue);
1231 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1232
1233 return (retcode);
1234 #endif
1235
1236
1237 #endif /* RAIDFRAME_RECON > 0 */
1238
1239 default:
1240 break; /* fall through to the os-specific code below */
1241
1242 }
1243
1244 if (!raidPtrs[unit]->valid)
1245 return (EINVAL);
1246
1247 /*
1248 * Add support for "regular" device ioctls here.
1249 */
1250
1251 switch (cmd) {
1252 case DIOCGDINFO:
1253 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1254 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1255 break;
1256
1257 case DIOCGPART:
1258 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1259 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1260 ((struct partinfo *) data)->part =
1261 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1262 break;
1263
1264 case DIOCWDINFO:
1265 db1_printf(("DIOCWDINFO\n"));
1266 case DIOCSDINFO:
1267 db1_printf(("DIOCSDINFO\n"));
1268 if ((error = raidlock(rs)) != 0)
1269 return (error);
1270
1271 rs->sc_flags |= RAIDF_LABELLING;
1272
1273 error = setdisklabel(rs->sc_dkdev.dk_label,
1274 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1275 if (error == 0) {
1276 if (cmd == DIOCWDINFO)
1277 error = writedisklabel(RAIDLABELDEV(dev),
1278 raidstrategy, rs->sc_dkdev.dk_label,
1279 rs->sc_dkdev.dk_cpulabel);
1280 }
1281 rs->sc_flags &= ~RAIDF_LABELLING;
1282
1283 raidunlock(rs);
1284
1285 if (error)
1286 return (error);
1287 break;
1288
1289 case DIOCWLABEL:
1290 db1_printf(("DIOCWLABEL\n"));
1291 if (*(int *) data != 0)
1292 rs->sc_flags |= RAIDF_WLABEL;
1293 else
1294 rs->sc_flags &= ~RAIDF_WLABEL;
1295 break;
1296
1297 case DIOCGDEFLABEL:
1298 db1_printf(("DIOCGDEFLABEL\n"));
1299 raidgetdefaultlabel(raidPtrs[unit], rs,
1300 (struct disklabel *) data);
1301 break;
1302
1303 default:
1304 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1305 }
1306 return (retcode);
1307
1308 }
1309
1310
1311 /* raidinit -- complete the rest of the initialization for the
1312 RAIDframe device. */
1313
1314
1315 static int
1316 raidinit(dev, raidPtr, unit)
1317 dev_t dev;
1318 RF_Raid_t *raidPtr;
1319 int unit;
1320 {
1321 int retcode;
1322 /* int ix; */
1323 /* struct raidbuf *raidbp; */
1324 struct raid_softc *rs;
1325
1326 retcode = 0;
1327
1328 rs = &raid_softc[unit];
1329 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1330 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1331
1332
1333 /* XXX should check return code first... */
1334 rs->sc_flags |= RAIDF_INITED;
1335
1336 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1337
1338 rs->sc_dkdev.dk_name = rs->sc_xname;
1339
1340 /* disk_attach actually creates space for the CPU disklabel, among
1341 * other things, so it's critical to call this *BEFORE* we try putzing
1342 * with disklabels. */
1343
1344 disk_attach(&rs->sc_dkdev);
1345
1346 /* XXX There may be a weird interaction here between this, and
1347 * protectedSectors, as used in RAIDframe. */
1348
1349 rs->sc_size = raidPtr->totalSectors;
1350 rs->sc_dev = dev;
1351
1352 return (retcode);
1353 }
1354
1355 /*
1356 * This kernel thread never exits. It is created once, and persists
1357 * until the system reboots.
1358 */
1359
1360 void
1361 rf_ReconKernelThread()
1362 {
1363 struct rf_recon_req *req;
1364 int s;
1365
1366 /* XXX not sure what spl() level we should be at here... probably
1367 * splbio() */
1368 s = splbio();
1369
1370 while (1) {
1371 /* grab the next reconstruction request from the queue */
1372 LOCK_RECON_Q_MUTEX();
1373 while (!recon_queue) {
1374 UNLOCK_RECON_Q_MUTEX();
1375 tsleep(&recon_queue, PRIBIO,
1376 "raidframe recon", 0);
1377 LOCK_RECON_Q_MUTEX();
1378 }
1379 req = recon_queue;
1380 recon_queue = recon_queue->next;
1381 UNLOCK_RECON_Q_MUTEX();
1382
1383 /*
1384 * If flags specifies that we should start recon, this call
1385 * will not return until reconstruction completes, fails,
1386 * or is aborted.
1387 */
1388 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1389 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1390
1391 RF_Free(req, sizeof(*req));
1392 }
1393 }
1394 /* wake up the daemon & tell it to get us a spare table
1395 * XXX
1396 * the entries in the queues should be tagged with the raidPtr
1397 * so that in the extremely rare case that two recons happen at once,
1398 * we know for which device were requesting a spare table
1399 * XXX
1400 */
1401 int
1402 rf_GetSpareTableFromDaemon(req)
1403 RF_SparetWait_t *req;
1404 {
1405 int retcode;
1406
1407 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1408 req->next = rf_sparet_wait_queue;
1409 rf_sparet_wait_queue = req;
1410 wakeup(&rf_sparet_wait_queue);
1411
1412 /* mpsleep unlocks the mutex */
1413 while (!rf_sparet_resp_queue) {
1414 tsleep(&rf_sparet_resp_queue, PRIBIO,
1415 "raidframe getsparetable", 0);
1416 }
1417 req = rf_sparet_resp_queue;
1418 rf_sparet_resp_queue = req->next;
1419 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1420
1421 retcode = req->fcol;
1422 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1423 * alloc'd */
1424 return (retcode);
1425 }
1426 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1427 * bp & passes it down.
1428 * any calls originating in the kernel must use non-blocking I/O
1429 * do some extra sanity checking to return "appropriate" error values for
1430 * certain conditions (to make some standard utilities work)
1431 */
1432 int
1433 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1434 RF_Raid_t *raidPtr;
1435 struct buf *bp;
1436 RF_RaidAccessFlags_t flags;
1437 void (*cbFunc) (struct buf *);
1438 void *cbArg;
1439 {
1440 RF_SectorCount_t num_blocks, pb, sum;
1441 RF_RaidAddr_t raid_addr;
1442 int retcode;
1443 struct partition *pp;
1444 daddr_t blocknum;
1445 int unit;
1446 struct raid_softc *rs;
1447 int do_async;
1448
1449 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1450
1451 unit = raidPtr->raidid;
1452 rs = &raid_softc[unit];
1453
1454 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1455 * partition.. Need to make it absolute to the underlying device.. */
1456
1457 blocknum = bp->b_blkno;
1458 if (DISKPART(bp->b_dev) != RAW_PART) {
1459 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1460 blocknum += pp->p_offset;
1461 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1462 pp->p_offset));
1463 } else {
1464 db1_printf(("Is raw..\n"));
1465 }
1466 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1467
1468 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1469 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1470
1471 /* *THIS* is where we adjust what block we're going to... but DO NOT
1472 * TOUCH bp->b_blkno!!! */
1473 raid_addr = blocknum;
1474
1475 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1476 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1477 sum = raid_addr + num_blocks + pb;
1478 if (1 || rf_debugKernelAccess) {
1479 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1480 (int) raid_addr, (int) sum, (int) num_blocks,
1481 (int) pb, (int) bp->b_resid));
1482 }
1483 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1484 || (sum < num_blocks) || (sum < pb)) {
1485 bp->b_error = ENOSPC;
1486 bp->b_flags |= B_ERROR;
1487 bp->b_resid = bp->b_bcount;
1488 biodone(bp);
1489 return (bp->b_error);
1490 }
1491 /*
1492 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1493 */
1494
1495 if (bp->b_bcount & raidPtr->sectorMask) {
1496 bp->b_error = EINVAL;
1497 bp->b_flags |= B_ERROR;
1498 bp->b_resid = bp->b_bcount;
1499 biodone(bp);
1500 return (bp->b_error);
1501 }
1502 db1_printf(("Calling DoAccess..\n"));
1503
1504
1505 /* Put a throttle on the number of requests we handle simultanously */
1506
1507 RF_LOCK_MUTEX(raidPtr->mutex);
1508
1509 while(raidPtr->openings <= 0) {
1510 RF_UNLOCK_MUTEX(raidPtr->mutex);
1511 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1512 RF_LOCK_MUTEX(raidPtr->mutex);
1513 }
1514 raidPtr->openings--;
1515
1516 RF_UNLOCK_MUTEX(raidPtr->mutex);
1517
1518 /*
1519 * Everything is async.
1520 */
1521 do_async = 1;
1522
1523 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1524 * B_READ instead */
1525 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1526 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1527 do_async, raid_addr, num_blocks,
1528 bp->b_un.b_addr,
1529 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1530 NULL, cbFunc, cbArg);
1531 #if 0
1532 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1533 bp->b_data, (int) bp->b_resid));
1534 #endif
1535
1536 return (retcode);
1537 }
1538 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1539
1540 int
1541 rf_DispatchKernelIO(queue, req)
1542 RF_DiskQueue_t *queue;
1543 RF_DiskQueueData_t *req;
1544 {
1545 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1546 struct buf *bp;
1547 struct raidbuf *raidbp = NULL;
1548 struct raid_softc *rs;
1549 int unit;
1550
1551 /* XXX along with the vnode, we also need the softc associated with
1552 * this device.. */
1553
1554 req->queue = queue;
1555
1556 unit = queue->raidPtr->raidid;
1557
1558 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1559
1560 if (unit >= numraid) {
1561 printf("Invalid unit number: %d %d\n", unit, numraid);
1562 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1563 }
1564 rs = &raid_softc[unit];
1565
1566 /* XXX is this the right place? */
1567 disk_busy(&rs->sc_dkdev);
1568
1569 bp = req->bp;
1570 #if 1
1571 /* XXX when there is a physical disk failure, someone is passing us a
1572 * buffer that contains old stuff!! Attempt to deal with this problem
1573 * without taking a performance hit... (not sure where the real bug
1574 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1575
1576 if (bp->b_flags & B_ERROR) {
1577 bp->b_flags &= ~B_ERROR;
1578 }
1579 if (bp->b_error != 0) {
1580 bp->b_error = 0;
1581 }
1582 #endif
1583 raidbp = RAIDGETBUF(rs);
1584
1585 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1586
1587 /*
1588 * context for raidiodone
1589 */
1590 raidbp->rf_obp = bp;
1591 raidbp->req = req;
1592
1593 LIST_INIT(&raidbp->rf_buf.b_dep);
1594
1595 switch (req->type) {
1596 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1597 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1598 * queue->row, queue->col); */
1599 /* XXX need to do something extra here.. */
1600 /* I'm leaving this in, as I've never actually seen it used,
1601 * and I'd like folks to report it... GO */
1602 printf(("WAKEUP CALLED\n"));
1603 queue->numOutstanding++;
1604
1605 /* XXX need to glue the original buffer into this?? */
1606
1607 KernelWakeupFunc(&raidbp->rf_buf);
1608 break;
1609
1610 case RF_IO_TYPE_READ:
1611 case RF_IO_TYPE_WRITE:
1612
1613 if (req->tracerec) {
1614 RF_ETIMER_START(req->tracerec->timer);
1615 }
1616 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1617 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1618 req->sectorOffset, req->numSector,
1619 req->buf, KernelWakeupFunc, (void *) req,
1620 queue->raidPtr->logBytesPerSector, req->b_proc);
1621
1622 if (rf_debugKernelAccess) {
1623 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1624 (long) bp->b_blkno));
1625 }
1626 queue->numOutstanding++;
1627 queue->last_deq_sector = req->sectorOffset;
1628 /* acc wouldn't have been let in if there were any pending
1629 * reqs at any other priority */
1630 queue->curPriority = req->priority;
1631 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1632 * req->type, queue->row, queue->col); */
1633
1634 db1_printf(("Going for %c to unit %d row %d col %d\n",
1635 req->type, unit, queue->row, queue->col));
1636 db1_printf(("sector %d count %d (%d bytes) %d\n",
1637 (int) req->sectorOffset, (int) req->numSector,
1638 (int) (req->numSector <<
1639 queue->raidPtr->logBytesPerSector),
1640 (int) queue->raidPtr->logBytesPerSector));
1641 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1642 raidbp->rf_buf.b_vp->v_numoutput++;
1643 }
1644 VOP_STRATEGY(&raidbp->rf_buf);
1645
1646 break;
1647
1648 default:
1649 panic("bad req->type in rf_DispatchKernelIO");
1650 }
1651 db1_printf(("Exiting from DispatchKernelIO\n"));
1652 return (0);
1653 }
1654 /* this is the callback function associated with a I/O invoked from
1655 kernel code.
1656 */
1657 static void
1658 KernelWakeupFunc(vbp)
1659 struct buf *vbp;
1660 {
1661 RF_DiskQueueData_t *req = NULL;
1662 RF_DiskQueue_t *queue;
1663 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1664 struct buf *bp;
1665 struct raid_softc *rs;
1666 int unit;
1667 register int s;
1668
1669 s = splbio(); /* XXX */
1670 db1_printf(("recovering the request queue:\n"));
1671 req = raidbp->req;
1672
1673 bp = raidbp->rf_obp;
1674 #if 0
1675 db1_printf(("bp=0x%x\n", bp));
1676 #endif
1677
1678 queue = (RF_DiskQueue_t *) req->queue;
1679
1680 if (raidbp->rf_buf.b_flags & B_ERROR) {
1681 #if 0
1682 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1683 #endif
1684 bp->b_flags |= B_ERROR;
1685 bp->b_error = raidbp->rf_buf.b_error ?
1686 raidbp->rf_buf.b_error : EIO;
1687 }
1688 #if 0
1689 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1690 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1691 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1692 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1693 #endif
1694
1695 /* XXX methinks this could be wrong... */
1696 #if 1
1697 bp->b_resid = raidbp->rf_buf.b_resid;
1698 #endif
1699
1700 if (req->tracerec) {
1701 RF_ETIMER_STOP(req->tracerec->timer);
1702 RF_ETIMER_EVAL(req->tracerec->timer);
1703 RF_LOCK_MUTEX(rf_tracing_mutex);
1704 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1705 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1706 req->tracerec->num_phys_ios++;
1707 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1708 }
1709 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1710
1711 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1712
1713
1714 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1715 * ballistic, and mark the component as hosed... */
1716 #if 1
1717 if (bp->b_flags & B_ERROR) {
1718 /* Mark the disk as dead */
1719 /* but only mark it once... */
1720 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1721 rf_ds_optimal) {
1722 printf("raid%d: IO Error. Marking %s as failed.\n",
1723 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1724 queue->raidPtr->Disks[queue->row][queue->col].status =
1725 rf_ds_failed;
1726 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1727 queue->raidPtr->numFailures++;
1728 /* XXX here we should bump the version number for each component, and write that data out */
1729 } else { /* Disk is already dead... */
1730 /* printf("Disk already marked as dead!\n"); */
1731 }
1732
1733 }
1734 #endif
1735
1736 rs = &raid_softc[unit];
1737 RAIDPUTBUF(rs, raidbp);
1738
1739
1740 if (bp->b_resid == 0) {
1741 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1742 unit, bp->b_resid, bp->b_bcount));
1743 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1744 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1745 } else {
1746 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1747 }
1748
1749 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1750 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1751 /* printf("Exiting KernelWakeupFunc\n"); */
1752
1753 splx(s); /* XXX */
1754 }
1755
1756
1757
1758 /*
1759 * initialize a buf structure for doing an I/O in the kernel.
1760 */
1761 static void
1762 InitBP(
1763 struct buf * bp,
1764 struct vnode * b_vp,
1765 unsigned rw_flag,
1766 dev_t dev,
1767 RF_SectorNum_t startSect,
1768 RF_SectorCount_t numSect,
1769 caddr_t buf,
1770 void (*cbFunc) (struct buf *),
1771 void *cbArg,
1772 int logBytesPerSector,
1773 struct proc * b_proc)
1774 {
1775 /* bp->b_flags = B_PHYS | rw_flag; */
1776 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1777 bp->b_bcount = numSect << logBytesPerSector;
1778 bp->b_bufsize = bp->b_bcount;
1779 bp->b_error = 0;
1780 bp->b_dev = dev;
1781 db1_printf(("bp->b_dev is %d\n", dev));
1782 bp->b_un.b_addr = buf;
1783 #if 0
1784 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1785 #endif
1786 bp->b_blkno = startSect;
1787 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1788 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1789 if (bp->b_bcount == 0) {
1790 panic("bp->b_bcount is zero in InitBP!!\n");
1791 }
1792 bp->b_proc = b_proc;
1793 bp->b_iodone = cbFunc;
1794 bp->b_vp = b_vp;
1795
1796 }
1797
1798 static void
1799 raidgetdefaultlabel(raidPtr, rs, lp)
1800 RF_Raid_t *raidPtr;
1801 struct raid_softc *rs;
1802 struct disklabel *lp;
1803 {
1804 db1_printf(("Building a default label...\n"));
1805 bzero(lp, sizeof(*lp));
1806
1807 /* fabricate a label... */
1808 lp->d_secperunit = raidPtr->totalSectors;
1809 lp->d_secsize = raidPtr->bytesPerSector;
1810 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1811 lp->d_ntracks = 1;
1812 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1813 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1814
1815 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1816 lp->d_type = DTYPE_RAID;
1817 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1818 lp->d_rpm = 3600;
1819 lp->d_interleave = 1;
1820 lp->d_flags = 0;
1821
1822 lp->d_partitions[RAW_PART].p_offset = 0;
1823 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1824 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1825 lp->d_npartitions = RAW_PART + 1;
1826
1827 lp->d_magic = DISKMAGIC;
1828 lp->d_magic2 = DISKMAGIC;
1829 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1830
1831 }
1832 /*
1833 * Read the disklabel from the raid device. If one is not present, fake one
1834 * up.
1835 */
1836 static void
1837 raidgetdisklabel(dev)
1838 dev_t dev;
1839 {
1840 int unit = raidunit(dev);
1841 struct raid_softc *rs = &raid_softc[unit];
1842 char *errstring;
1843 struct disklabel *lp = rs->sc_dkdev.dk_label;
1844 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1845 RF_Raid_t *raidPtr;
1846
1847 db1_printf(("Getting the disklabel...\n"));
1848
1849 bzero(clp, sizeof(*clp));
1850
1851 raidPtr = raidPtrs[unit];
1852
1853 raidgetdefaultlabel(raidPtr, rs, lp);
1854
1855 /*
1856 * Call the generic disklabel extraction routine.
1857 */
1858 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1859 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1860 if (errstring)
1861 raidmakedisklabel(rs);
1862 else {
1863 int i;
1864 struct partition *pp;
1865
1866 /*
1867 * Sanity check whether the found disklabel is valid.
1868 *
1869 * This is necessary since total size of the raid device
1870 * may vary when an interleave is changed even though exactly
1871 * same componets are used, and old disklabel may used
1872 * if that is found.
1873 */
1874 if (lp->d_secperunit != rs->sc_size)
1875 printf("WARNING: %s: "
1876 "total sector size in disklabel (%d) != "
1877 "the size of raid (%ld)\n", rs->sc_xname,
1878 lp->d_secperunit, (long) rs->sc_size);
1879 for (i = 0; i < lp->d_npartitions; i++) {
1880 pp = &lp->d_partitions[i];
1881 if (pp->p_offset + pp->p_size > rs->sc_size)
1882 printf("WARNING: %s: end of partition `%c' "
1883 "exceeds the size of raid (%ld)\n",
1884 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1885 }
1886 }
1887
1888 }
1889 /*
1890 * Take care of things one might want to take care of in the event
1891 * that a disklabel isn't present.
1892 */
1893 static void
1894 raidmakedisklabel(rs)
1895 struct raid_softc *rs;
1896 {
1897 struct disklabel *lp = rs->sc_dkdev.dk_label;
1898 db1_printf(("Making a label..\n"));
1899
1900 /*
1901 * For historical reasons, if there's no disklabel present
1902 * the raw partition must be marked FS_BSDFFS.
1903 */
1904
1905 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1906
1907 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1908
1909 lp->d_checksum = dkcksum(lp);
1910 }
1911 /*
1912 * Lookup the provided name in the filesystem. If the file exists,
1913 * is a valid block device, and isn't being used by anyone else,
1914 * set *vpp to the file's vnode.
1915 * You'll find the original of this in ccd.c
1916 */
1917 int
1918 raidlookup(path, p, vpp)
1919 char *path;
1920 struct proc *p;
1921 struct vnode **vpp; /* result */
1922 {
1923 struct nameidata nd;
1924 struct vnode *vp;
1925 struct vattr va;
1926 int error;
1927
1928 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1929 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1930 #ifdef DEBUG
1931 printf("RAIDframe: vn_open returned %d\n", error);
1932 #endif
1933 return (error);
1934 }
1935 vp = nd.ni_vp;
1936 if (vp->v_usecount > 1) {
1937 VOP_UNLOCK(vp, 0);
1938 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1939 return (EBUSY);
1940 }
1941 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1942 VOP_UNLOCK(vp, 0);
1943 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1944 return (error);
1945 }
1946 /* XXX: eventually we should handle VREG, too. */
1947 if (va.va_type != VBLK) {
1948 VOP_UNLOCK(vp, 0);
1949 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1950 return (ENOTBLK);
1951 }
1952 VOP_UNLOCK(vp, 0);
1953 *vpp = vp;
1954 return (0);
1955 }
1956 /*
1957 * Wait interruptibly for an exclusive lock.
1958 *
1959 * XXX
1960 * Several drivers do this; it should be abstracted and made MP-safe.
1961 * (Hmm... where have we seen this warning before :-> GO )
1962 */
1963 static int
1964 raidlock(rs)
1965 struct raid_softc *rs;
1966 {
1967 int error;
1968
1969 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1970 rs->sc_flags |= RAIDF_WANTED;
1971 if ((error =
1972 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1973 return (error);
1974 }
1975 rs->sc_flags |= RAIDF_LOCKED;
1976 return (0);
1977 }
1978 /*
1979 * Unlock and wake up any waiters.
1980 */
1981 static void
1982 raidunlock(rs)
1983 struct raid_softc *rs;
1984 {
1985
1986 rs->sc_flags &= ~RAIDF_LOCKED;
1987 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1988 rs->sc_flags &= ~RAIDF_WANTED;
1989 wakeup(rs);
1990 }
1991 }
1992
1993
1994 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1995 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1996
1997 int
1998 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1999 {
2000 RF_ComponentLabel_t component_label;
2001 raidread_component_label(dev, b_vp, &component_label);
2002 component_label.mod_counter = mod_counter;
2003 component_label.clean = RF_RAID_CLEAN;
2004 raidwrite_component_label(dev, b_vp, &component_label);
2005 return(0);
2006 }
2007
2008
2009 int
2010 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2011 {
2012 RF_ComponentLabel_t component_label;
2013 raidread_component_label(dev, b_vp, &component_label);
2014 component_label.mod_counter = mod_counter;
2015 component_label.clean = RF_RAID_DIRTY;
2016 raidwrite_component_label(dev, b_vp, &component_label);
2017 return(0);
2018 }
2019
2020 /* ARGSUSED */
2021 int
2022 raidread_component_label(dev, b_vp, component_label)
2023 dev_t dev;
2024 struct vnode *b_vp;
2025 RF_ComponentLabel_t *component_label;
2026 {
2027 struct buf *bp;
2028 int error;
2029
2030 /* XXX should probably ensure that we don't try to do this if
2031 someone has changed rf_protected_sectors. */
2032
2033 /* get a block of the appropriate size... */
2034 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2035 bp->b_dev = dev;
2036
2037 /* get our ducks in a row for the read */
2038 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2039 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2040 bp->b_flags = B_BUSY | B_READ;
2041 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2042
2043 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2044
2045 error = biowait(bp);
2046
2047 if (!error) {
2048 memcpy(component_label, bp->b_un.b_addr,
2049 sizeof(RF_ComponentLabel_t));
2050 #if 0
2051 printf("raidread_component_label: got component label:\n");
2052 printf("Version: %d\n",component_label->version);
2053 printf("Serial Number: %d\n",component_label->serial_number);
2054 printf("Mod counter: %d\n",component_label->mod_counter);
2055 printf("Row: %d\n", component_label->row);
2056 printf("Column: %d\n", component_label->column);
2057 printf("Num Rows: %d\n", component_label->num_rows);
2058 printf("Num Columns: %d\n", component_label->num_columns);
2059 printf("Clean: %d\n", component_label->clean);
2060 printf("Status: %d\n", component_label->status);
2061 #endif
2062 } else {
2063 printf("Failed to read RAID component label!\n");
2064 }
2065
2066 bp->b_flags = B_INVAL | B_AGE;
2067 brelse(bp);
2068 return(error);
2069 }
2070 /* ARGSUSED */
2071 int
2072 raidwrite_component_label(dev, b_vp, component_label)
2073 dev_t dev;
2074 struct vnode *b_vp;
2075 RF_ComponentLabel_t *component_label;
2076 {
2077 struct buf *bp;
2078 int error;
2079
2080 /* get a block of the appropriate size... */
2081 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2082 bp->b_dev = dev;
2083
2084 /* get our ducks in a row for the write */
2085 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2086 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2087 bp->b_flags = B_BUSY | B_WRITE;
2088 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2089
2090 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2091
2092 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2093
2094 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2095 error = biowait(bp);
2096 bp->b_flags = B_INVAL | B_AGE;
2097 brelse(bp);
2098 if (error) {
2099 printf("Failed to write RAID component info!\n");
2100 }
2101
2102 return(error);
2103 }
2104
2105 void
2106 rf_markalldirty( raidPtr )
2107 RF_Raid_t *raidPtr;
2108 {
2109 RF_ComponentLabel_t c_label;
2110 int r,c;
2111
2112 raidPtr->mod_counter++;
2113 for (r = 0; r < raidPtr->numRow; r++) {
2114 for (c = 0; c < raidPtr->numCol; c++) {
2115 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2116 raidread_component_label(
2117 raidPtr->Disks[r][c].dev,
2118 raidPtr->raid_cinfo[r][c].ci_vp,
2119 &c_label);
2120 if (c_label.status == rf_ds_spared) {
2121 /* XXX do something special...
2122 but whatever you do, don't
2123 try to access it!! */
2124 } else {
2125 #if 0
2126 c_label.status =
2127 raidPtr->Disks[r][c].status;
2128 raidwrite_component_label(
2129 raidPtr->Disks[r][c].dev,
2130 raidPtr->raid_cinfo[r][c].ci_vp,
2131 &c_label);
2132 #endif
2133 raidmarkdirty(
2134 raidPtr->Disks[r][c].dev,
2135 raidPtr->raid_cinfo[r][c].ci_vp,
2136 raidPtr->mod_counter);
2137 }
2138 }
2139 }
2140 }
2141 /* printf("Component labels marked dirty.\n"); */
2142 #if 0
2143 for( c = 0; c < raidPtr->numSpare ; c++) {
2144 sparecol = raidPtr->numCol + c;
2145 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2146 /*
2147
2148 XXX this is where we get fancy and map this spare
2149 into it's correct spot in the array.
2150
2151 */
2152 /*
2153
2154 we claim this disk is "optimal" if it's
2155 rf_ds_used_spare, as that means it should be
2156 directly substitutable for the disk it replaced.
2157 We note that too...
2158
2159 */
2160
2161 for(i=0;i<raidPtr->numRow;i++) {
2162 for(j=0;j<raidPtr->numCol;j++) {
2163 if ((raidPtr->Disks[i][j].spareRow ==
2164 r) &&
2165 (raidPtr->Disks[i][j].spareCol ==
2166 sparecol)) {
2167 srow = r;
2168 scol = sparecol;
2169 break;
2170 }
2171 }
2172 }
2173
2174 raidread_component_label(
2175 raidPtr->Disks[r][sparecol].dev,
2176 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2177 &c_label);
2178 /* make sure status is noted */
2179 c_label.version = RF_COMPONENT_LABEL_VERSION;
2180 c_label.mod_counter = raidPtr->mod_counter;
2181 c_label.serial_number = raidPtr->serial_number;
2182 c_label.row = srow;
2183 c_label.column = scol;
2184 c_label.num_rows = raidPtr->numRow;
2185 c_label.num_columns = raidPtr->numCol;
2186 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2187 c_label.status = rf_ds_optimal;
2188 raidwrite_component_label(
2189 raidPtr->Disks[r][sparecol].dev,
2190 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2191 &c_label);
2192 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2193 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2194 }
2195 }
2196
2197 #endif
2198 }
2199
2200
2201 void
2202 rf_update_component_labels( raidPtr )
2203 RF_Raid_t *raidPtr;
2204 {
2205 RF_ComponentLabel_t c_label;
2206 int sparecol;
2207 int r,c;
2208 int i,j;
2209 int srow, scol;
2210
2211 srow = -1;
2212 scol = -1;
2213
2214 /* XXX should do extra checks to make sure things really are clean,
2215 rather than blindly setting the clean bit... */
2216
2217 raidPtr->mod_counter++;
2218
2219 for (r = 0; r < raidPtr->numRow; r++) {
2220 for (c = 0; c < raidPtr->numCol; c++) {
2221 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2222 raidread_component_label(
2223 raidPtr->Disks[r][c].dev,
2224 raidPtr->raid_cinfo[r][c].ci_vp,
2225 &c_label);
2226 /* make sure status is noted */
2227 c_label.status = rf_ds_optimal;
2228 raidwrite_component_label(
2229 raidPtr->Disks[r][c].dev,
2230 raidPtr->raid_cinfo[r][c].ci_vp,
2231 &c_label);
2232 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2233 raidmarkclean(
2234 raidPtr->Disks[r][c].dev,
2235 raidPtr->raid_cinfo[r][c].ci_vp,
2236 raidPtr->mod_counter);
2237 }
2238 }
2239 /* else we don't touch it.. */
2240 #if 0
2241 else if (raidPtr->Disks[r][c].status !=
2242 rf_ds_failed) {
2243 raidread_component_label(
2244 raidPtr->Disks[r][c].dev,
2245 raidPtr->raid_cinfo[r][c].ci_vp,
2246 &c_label);
2247 /* make sure status is noted */
2248 c_label.status =
2249 raidPtr->Disks[r][c].status;
2250 raidwrite_component_label(
2251 raidPtr->Disks[r][c].dev,
2252 raidPtr->raid_cinfo[r][c].ci_vp,
2253 &c_label);
2254 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2255 raidmarkclean(
2256 raidPtr->Disks[r][c].dev,
2257 raidPtr->raid_cinfo[r][c].ci_vp,
2258 raidPtr->mod_counter);
2259 }
2260 }
2261 #endif
2262 }
2263 }
2264
2265 for( c = 0; c < raidPtr->numSpare ; c++) {
2266 sparecol = raidPtr->numCol + c;
2267 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2268 /*
2269
2270 we claim this disk is "optimal" if it's
2271 rf_ds_used_spare, as that means it should be
2272 directly substitutable for the disk it replaced.
2273 We note that too...
2274
2275 */
2276
2277 for(i=0;i<raidPtr->numRow;i++) {
2278 for(j=0;j<raidPtr->numCol;j++) {
2279 if ((raidPtr->Disks[i][j].spareRow ==
2280 0) &&
2281 (raidPtr->Disks[i][j].spareCol ==
2282 sparecol)) {
2283 srow = i;
2284 scol = j;
2285 break;
2286 }
2287 }
2288 }
2289
2290 raidread_component_label(
2291 raidPtr->Disks[0][sparecol].dev,
2292 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2293 &c_label);
2294 /* make sure status is noted */
2295 c_label.version = RF_COMPONENT_LABEL_VERSION;
2296 c_label.mod_counter = raidPtr->mod_counter;
2297 c_label.serial_number = raidPtr->serial_number;
2298 c_label.row = srow;
2299 c_label.column = scol;
2300 c_label.num_rows = raidPtr->numRow;
2301 c_label.num_columns = raidPtr->numCol;
2302 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2303 c_label.status = rf_ds_optimal;
2304 raidwrite_component_label(
2305 raidPtr->Disks[0][sparecol].dev,
2306 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2307 &c_label);
2308 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2309 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2310 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2311 raidPtr->mod_counter);
2312 }
2313 }
2314 }
2315 /* printf("Component labels updated\n"); */
2316 }
2317