rf_netbsdkintf.c revision 1.28 1 /* $NetBSD: rf_netbsdkintf.c,v 1.28 1999/08/14 03:47:07 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 /*
272 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
273 * Be aware that large numbers can allow the driver to consume a lot of
274 * kernel memory, especially on writes, and in degraded mode reads.
275 *
276 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
277 * a single 64K write will typically require 64K for the old data,
278 * 64K for the old parity, and 64K for the new parity, for a total
279 * of 192K (if the parity buffer is not re-used immediately).
280 * Even it if is used immedately, that's still 128K, which when multiplied
281 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
282 *
283 * Now in degraded mode, for example, a 64K read on the above setup may
284 * require data reconstruction, which will require *all* of the 4 remaining
285 * disks to participate -- 4 * 32K/disk == 128K again.
286 */
287
288 #ifndef RAIDOUTSTANDING
289 #define RAIDOUTSTANDING 6
290 #endif
291
292 #define RAIDLABELDEV(dev) \
293 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
294
295 /* declared here, and made public, for the benefit of KVM stuff.. */
296 struct raid_softc *raid_softc;
297
298 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
299 struct disklabel *));
300 static void raidgetdisklabel __P((dev_t));
301 static void raidmakedisklabel __P((struct raid_softc *));
302
303 static int raidlock __P((struct raid_softc *));
304 static void raidunlock __P((struct raid_softc *));
305 int raidlookup __P((char *, struct proc * p, struct vnode **));
306
307 static void rf_markalldirty __P((RF_Raid_t *));
308
309 void
310 raidattach(num)
311 int num;
312 {
313 int raidID;
314 int i, rc;
315
316 #ifdef DEBUG
317 printf("raidattach: Asked for %d units\n", num);
318 #endif
319
320 if (num <= 0) {
321 #ifdef DIAGNOSTIC
322 panic("raidattach: count <= 0");
323 #endif
324 return;
325 }
326 /* This is where all the initialization stuff gets done. */
327
328 /* Make some space for requested number of units... */
329
330 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
331 if (raidPtrs == NULL) {
332 panic("raidPtrs is NULL!!\n");
333 }
334
335 rc = rf_mutex_init(&rf_sparet_wait_mutex);
336 if (rc) {
337 RF_PANIC();
338 }
339
340 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
341 recon_queue = NULL;
342
343 for (i = 0; i < numraid; i++)
344 raidPtrs[i] = NULL;
345 rc = rf_BootRaidframe();
346 if (rc == 0)
347 printf("Kernelized RAIDframe activated\n");
348 else
349 panic("Serious error booting RAID!!\n");
350
351 rf_kbooted = RFK_BOOT_GOOD;
352
353 /* put together some datastructures like the CCD device does.. This
354 * lets us lock the device and what-not when it gets opened. */
355
356 raid_softc = (struct raid_softc *)
357 malloc(num * sizeof(struct raid_softc),
358 M_RAIDFRAME, M_NOWAIT);
359 if (raid_softc == NULL) {
360 printf("WARNING: no memory for RAIDframe driver\n");
361 return;
362 }
363 numraid = num;
364 bzero(raid_softc, num * sizeof(struct raid_softc));
365
366 for (raidID = 0; raidID < num; raidID++) {
367 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
368 (RF_Raid_t *));
369 if (raidPtrs[raidID] == NULL) {
370 printf("raidPtrs[%d] is NULL\n", raidID);
371 }
372 }
373 }
374
375
376 int
377 raidsize(dev)
378 dev_t dev;
379 {
380 struct raid_softc *rs;
381 struct disklabel *lp;
382 int part, unit, omask, size;
383
384 unit = raidunit(dev);
385 if (unit >= numraid)
386 return (-1);
387 rs = &raid_softc[unit];
388
389 if ((rs->sc_flags & RAIDF_INITED) == 0)
390 return (-1);
391
392 part = DISKPART(dev);
393 omask = rs->sc_dkdev.dk_openmask & (1 << part);
394 lp = rs->sc_dkdev.dk_label;
395
396 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
397 return (-1);
398
399 if (lp->d_partitions[part].p_fstype != FS_SWAP)
400 size = -1;
401 else
402 size = lp->d_partitions[part].p_size *
403 (lp->d_secsize / DEV_BSIZE);
404
405 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
406 return (-1);
407
408 return (size);
409
410 }
411
412 int
413 raiddump(dev, blkno, va, size)
414 dev_t dev;
415 daddr_t blkno;
416 caddr_t va;
417 size_t size;
418 {
419 /* Not implemented. */
420 return ENXIO;
421 }
422 /* ARGSUSED */
423 int
424 raidopen(dev, flags, fmt, p)
425 dev_t dev;
426 int flags, fmt;
427 struct proc *p;
428 {
429 int unit = raidunit(dev);
430 struct raid_softc *rs;
431 struct disklabel *lp;
432 int part, pmask;
433 int error = 0;
434
435 if (unit >= numraid)
436 return (ENXIO);
437 rs = &raid_softc[unit];
438
439 if ((error = raidlock(rs)) != 0)
440 return (error);
441 lp = rs->sc_dkdev.dk_label;
442
443 part = DISKPART(dev);
444 pmask = (1 << part);
445
446 db1_printf(("Opening raid device number: %d partition: %d\n",
447 unit, part));
448
449
450 if ((rs->sc_flags & RAIDF_INITED) &&
451 (rs->sc_dkdev.dk_openmask == 0))
452 raidgetdisklabel(dev);
453
454 /* make sure that this partition exists */
455
456 if (part != RAW_PART) {
457 db1_printf(("Not a raw partition..\n"));
458 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
459 ((part >= lp->d_npartitions) ||
460 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
461 error = ENXIO;
462 raidunlock(rs);
463 db1_printf(("Bailing out...\n"));
464 return (error);
465 }
466 }
467 /* Prevent this unit from being unconfigured while open. */
468 switch (fmt) {
469 case S_IFCHR:
470 rs->sc_dkdev.dk_copenmask |= pmask;
471 break;
472
473 case S_IFBLK:
474 rs->sc_dkdev.dk_bopenmask |= pmask;
475 break;
476 }
477
478 if ((rs->sc_dkdev.dk_openmask == 0) &&
479 ((rs->sc_flags & RAIDF_INITED) != 0)) {
480 /* First one... mark things as dirty... Note that we *MUST*
481 have done a configure before this. I DO NOT WANT TO BE
482 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
483 THAT THEY BELONG TOGETHER!!!!! */
484 /* XXX should check to see if we're only open for reading
485 here... If so, we needn't do this, but then need some
486 other way of keeping track of what's happened.. */
487
488 rf_markalldirty( raidPtrs[unit] );
489 }
490
491
492 rs->sc_dkdev.dk_openmask =
493 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
494
495 raidunlock(rs);
496
497 return (error);
498
499
500 }
501 /* ARGSUSED */
502 int
503 raidclose(dev, flags, fmt, p)
504 dev_t dev;
505 int flags, fmt;
506 struct proc *p;
507 {
508 int unit = raidunit(dev);
509 struct raid_softc *rs;
510 int error = 0;
511 int part;
512
513 if (unit >= numraid)
514 return (ENXIO);
515 rs = &raid_softc[unit];
516
517 if ((error = raidlock(rs)) != 0)
518 return (error);
519
520 part = DISKPART(dev);
521
522 /* ...that much closer to allowing unconfiguration... */
523 switch (fmt) {
524 case S_IFCHR:
525 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
526 break;
527
528 case S_IFBLK:
529 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
530 break;
531 }
532 rs->sc_dkdev.dk_openmask =
533 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
534
535 if ((rs->sc_dkdev.dk_openmask == 0) &&
536 ((rs->sc_flags & RAIDF_INITED) != 0)) {
537 /* Last one... device is not unconfigured yet.
538 Device shutdown has taken care of setting the
539 clean bits if RAIDF_INITED is not set
540 mark things as clean... */
541 rf_update_component_labels( raidPtrs[unit] );
542 }
543
544 raidunlock(rs);
545 return (0);
546
547 }
548
549 void
550 raidstrategy(bp)
551 register struct buf *bp;
552 {
553 register int s;
554
555 unsigned int raidID = raidunit(bp->b_dev);
556 RF_Raid_t *raidPtr;
557 struct raid_softc *rs = &raid_softc[raidID];
558 struct disklabel *lp;
559 int wlabel;
560
561 #if 0
562 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
563 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
564 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
565 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
566 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
567
568 if (bp->b_flags & B_READ)
569 db1_printf(("READ\n"));
570 else
571 db1_printf(("WRITE\n"));
572 #endif
573 if (rf_kbooted != RFK_BOOT_GOOD)
574 return;
575 if (raidID >= numraid || !raidPtrs[raidID]) {
576 bp->b_error = ENODEV;
577 bp->b_flags |= B_ERROR;
578 bp->b_resid = bp->b_bcount;
579 biodone(bp);
580 return;
581 }
582 raidPtr = raidPtrs[raidID];
583 if (!raidPtr->valid) {
584 bp->b_error = ENODEV;
585 bp->b_flags |= B_ERROR;
586 bp->b_resid = bp->b_bcount;
587 biodone(bp);
588 return;
589 }
590 if (bp->b_bcount == 0) {
591 db1_printf(("b_bcount is zero..\n"));
592 biodone(bp);
593 return;
594 }
595 lp = rs->sc_dkdev.dk_label;
596
597 /*
598 * Do bounds checking and adjust transfer. If there's an
599 * error, the bounds check will flag that for us.
600 */
601
602 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
603 if (DISKPART(bp->b_dev) != RAW_PART)
604 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
605 db1_printf(("Bounds check failed!!:%d %d\n",
606 (int) bp->b_blkno, (int) wlabel));
607 biodone(bp);
608 return;
609 }
610 s = splbio(); /* XXX Needed? */
611 db1_printf(("Beginning strategy...\n"));
612
613 bp->b_resid = 0;
614 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
615 NULL, NULL, NULL);
616 if (bp->b_error) {
617 bp->b_flags |= B_ERROR;
618 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
619 bp->b_error));
620 }
621 splx(s);
622 #if 0
623 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
624 bp, bp->b_data,
625 (int) bp->b_bcount, (int) bp->b_resid));
626 #endif
627 }
628 /* ARGSUSED */
629 int
630 raidread(dev, uio, flags)
631 dev_t dev;
632 struct uio *uio;
633 int flags;
634 {
635 int unit = raidunit(dev);
636 struct raid_softc *rs;
637 int part;
638
639 if (unit >= numraid)
640 return (ENXIO);
641 rs = &raid_softc[unit];
642
643 if ((rs->sc_flags & RAIDF_INITED) == 0)
644 return (ENXIO);
645 part = DISKPART(dev);
646
647 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
648
649 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
650
651 }
652 /* ARGSUSED */
653 int
654 raidwrite(dev, uio, flags)
655 dev_t dev;
656 struct uio *uio;
657 int flags;
658 {
659 int unit = raidunit(dev);
660 struct raid_softc *rs;
661
662 if (unit >= numraid)
663 return (ENXIO);
664 rs = &raid_softc[unit];
665
666 if ((rs->sc_flags & RAIDF_INITED) == 0)
667 return (ENXIO);
668 db1_printf(("raidwrite\n"));
669 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
670
671 }
672
673 int
674 raidioctl(dev, cmd, data, flag, p)
675 dev_t dev;
676 u_long cmd;
677 caddr_t data;
678 int flag;
679 struct proc *p;
680 {
681 int unit = raidunit(dev);
682 int error = 0;
683 int part, pmask;
684 struct raid_softc *rs;
685 #if 0
686 int r, c;
687 #endif
688 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
689
690 /* struct ccdbuf *cbp; */
691 /* struct raidbuf *raidbp; */
692 RF_Config_t *k_cfg, *u_cfg;
693 u_char *specific_buf;
694 int retcode = 0;
695 int row;
696 int column;
697 int s;
698 struct rf_recon_req *rrcopy, *rr;
699 RF_ComponentLabel_t *component_label;
700 RF_ComponentLabel_t ci_label;
701 RF_ComponentLabel_t **c_label_ptr;
702 RF_SingleComponent_t *sparePtr,*componentPtr;
703 RF_SingleComponent_t hot_spare;
704 RF_SingleComponent_t component;
705
706 if (unit >= numraid)
707 return (ENXIO);
708 rs = &raid_softc[unit];
709
710 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
711 (int) DISKPART(dev), (int) unit, (int) cmd));
712
713 /* Must be open for writes for these commands... */
714 switch (cmd) {
715 case DIOCSDINFO:
716 case DIOCWDINFO:
717 case DIOCWLABEL:
718 if ((flag & FWRITE) == 0)
719 return (EBADF);
720 }
721
722 /* Must be initialized for these... */
723 switch (cmd) {
724 case DIOCGDINFO:
725 case DIOCSDINFO:
726 case DIOCWDINFO:
727 case DIOCGPART:
728 case DIOCWLABEL:
729 case DIOCGDEFLABEL:
730 case RAIDFRAME_SHUTDOWN:
731 case RAIDFRAME_REWRITEPARITY:
732 case RAIDFRAME_GET_INFO:
733 case RAIDFRAME_RESET_ACCTOTALS:
734 case RAIDFRAME_GET_ACCTOTALS:
735 case RAIDFRAME_KEEP_ACCTOTALS:
736 case RAIDFRAME_GET_SIZE:
737 case RAIDFRAME_FAIL_DISK:
738 case RAIDFRAME_COPYBACK:
739 case RAIDFRAME_CHECKRECON:
740 case RAIDFRAME_GET_COMPONENT_LABEL:
741 case RAIDFRAME_SET_COMPONENT_LABEL:
742 case RAIDFRAME_ADD_HOT_SPARE:
743 case RAIDFRAME_REMOVE_HOT_SPARE:
744 case RAIDFRAME_INIT_LABELS:
745 case RAIDFRAME_REBUILD_IN_PLACE:
746 case RAIDFRAME_CHECK_PARITY:
747 if ((rs->sc_flags & RAIDF_INITED) == 0)
748 return (ENXIO);
749 }
750
751 switch (cmd) {
752
753
754 /* configure the system */
755 case RAIDFRAME_CONFIGURE:
756
757 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
758 /* copy-in the configuration information */
759 /* data points to a pointer to the configuration structure */
760 u_cfg = *((RF_Config_t **) data);
761 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
762 if (k_cfg == NULL) {
763 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
764 return (ENOMEM);
765 }
766 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
767 sizeof(RF_Config_t));
768 if (retcode) {
769 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
770 retcode));
771 return (retcode);
772 }
773 /* allocate a buffer for the layout-specific data, and copy it
774 * in */
775 if (k_cfg->layoutSpecificSize) {
776 if (k_cfg->layoutSpecificSize > 10000) {
777 /* sanity check */
778 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
779 return (EINVAL);
780 }
781 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
782 (u_char *));
783 if (specific_buf == NULL) {
784 RF_Free(k_cfg, sizeof(RF_Config_t));
785 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
786 return (ENOMEM);
787 }
788 retcode = copyin(k_cfg->layoutSpecific,
789 (caddr_t) specific_buf,
790 k_cfg->layoutSpecificSize);
791 if (retcode) {
792 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
793 retcode));
794 return (retcode);
795 }
796 } else
797 specific_buf = NULL;
798 k_cfg->layoutSpecific = specific_buf;
799
800 /* should do some kind of sanity check on the configuration.
801 * Store the sum of all the bytes in the last byte? */
802
803 /* configure the system */
804
805 raidPtrs[unit]->raidid = unit;
806
807 retcode = rf_Configure(raidPtrs[unit], k_cfg);
808
809 /* allow this many simultaneous IO's to this RAID device */
810 raidPtrs[unit]->openings = RAIDOUTSTANDING;
811
812 if (retcode == 0) {
813 retcode = raidinit(dev, raidPtrs[unit], unit);
814 rf_markalldirty( raidPtrs[unit] );
815 }
816 /* free the buffers. No return code here. */
817 if (k_cfg->layoutSpecificSize) {
818 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
819 }
820 RF_Free(k_cfg, sizeof(RF_Config_t));
821
822 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
823 retcode));
824
825 return (retcode);
826
827 /* shutdown the system */
828 case RAIDFRAME_SHUTDOWN:
829
830 if ((error = raidlock(rs)) != 0)
831 return (error);
832
833 /*
834 * If somebody has a partition mounted, we shouldn't
835 * shutdown.
836 */
837
838 part = DISKPART(dev);
839 pmask = (1 << part);
840 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
841 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
842 (rs->sc_dkdev.dk_copenmask & pmask))) {
843 raidunlock(rs);
844 return (EBUSY);
845 }
846
847 if (rf_debugKernelAccess) {
848 printf("call shutdown\n");
849 }
850
851 retcode = rf_Shutdown(raidPtrs[unit]);
852
853 db1_printf(("Done main shutdown\n"));
854
855 pool_destroy(&rs->sc_cbufpool);
856 db1_printf(("Done freeing component buffer freelist\n"));
857
858 /* It's no longer initialized... */
859 rs->sc_flags &= ~RAIDF_INITED;
860
861 /* Detach the disk. */
862 disk_detach(&rs->sc_dkdev);
863
864 raidunlock(rs);
865
866 return (retcode);
867 case RAIDFRAME_GET_COMPONENT_LABEL:
868 c_label_ptr = (RF_ComponentLabel_t **) data;
869 /* need to read the component label for the disk indicated
870 by row,column in component_label
871 XXX need to sanity check these values!!!
872 */
873
874 /* For practice, let's get it directly fromdisk, rather
875 than from the in-core copy */
876 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
877 (RF_ComponentLabel_t *));
878 if (component_label == NULL)
879 return (ENOMEM);
880
881 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
882
883 retcode = copyin( *c_label_ptr, component_label,
884 sizeof(RF_ComponentLabel_t));
885
886 if (retcode) {
887 return(retcode);
888 }
889
890 row = component_label->row;
891 column = component_label->column;
892
893 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
894 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
895 return(EINVAL);
896 }
897
898 raidread_component_label(
899 raidPtrs[unit]->Disks[row][column].dev,
900 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
901 component_label );
902
903 retcode = copyout((caddr_t) component_label,
904 (caddr_t) *c_label_ptr,
905 sizeof(RF_ComponentLabel_t));
906 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
907 return (retcode);
908
909 case RAIDFRAME_SET_COMPONENT_LABEL:
910 component_label = (RF_ComponentLabel_t *) data;
911
912 /* XXX check the label for valid stuff... */
913 /* Note that some things *should not* get modified --
914 the user should be re-initing the labels instead of
915 trying to patch things.
916 */
917
918 printf("Got component label:\n");
919 printf("Version: %d\n",component_label->version);
920 printf("Serial Number: %d\n",component_label->serial_number);
921 printf("Mod counter: %d\n",component_label->mod_counter);
922 printf("Row: %d\n", component_label->row);
923 printf("Column: %d\n", component_label->column);
924 printf("Num Rows: %d\n", component_label->num_rows);
925 printf("Num Columns: %d\n", component_label->num_columns);
926 printf("Clean: %d\n", component_label->clean);
927 printf("Status: %d\n", component_label->status);
928
929 row = component_label->row;
930 column = component_label->column;
931
932 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
933 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
934 return(EINVAL);
935 }
936
937 /* XXX this isn't allowed to do anything for now :-) */
938 #if 0
939 raidwrite_component_label(
940 raidPtrs[unit]->Disks[row][column].dev,
941 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
942 component_label );
943 #endif
944 return (0);
945
946 case RAIDFRAME_INIT_LABELS:
947 component_label = (RF_ComponentLabel_t *) data;
948 /*
949 we only want the serial number from
950 the above. We get all the rest of the information
951 from the config that was used to create this RAID
952 set.
953 */
954
955 raidPtrs[unit]->serial_number = component_label->serial_number;
956 /* current version number */
957 ci_label.version = RF_COMPONENT_LABEL_VERSION;
958 ci_label.serial_number = component_label->serial_number;
959 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
960 ci_label.num_rows = raidPtrs[unit]->numRow;
961 ci_label.num_columns = raidPtrs[unit]->numCol;
962 ci_label.clean = RF_RAID_DIRTY; /* not clean */
963 ci_label.status = rf_ds_optimal; /* "It's good!" */
964
965 for(row=0;row<raidPtrs[unit]->numRow;row++) {
966 ci_label.row = row;
967 for(column=0;column<raidPtrs[unit]->numCol;column++) {
968 ci_label.column = column;
969 raidwrite_component_label(
970 raidPtrs[unit]->Disks[row][column].dev,
971 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
972 &ci_label );
973 }
974 }
975
976 return (retcode);
977
978 /* initialize all parity */
979 case RAIDFRAME_REWRITEPARITY:
980
981 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
982 /* Parity for RAID 0 is trivially correct */
983 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
984 return(0);
985 }
986
987 /* borrow the thread of the requesting process */
988
989 s = splbio();
990 retcode = rf_RewriteParity(raidPtrs[unit]);
991 splx(s);
992 /* return I/O Error if the parity rewrite fails */
993
994 if (retcode) {
995 retcode = EIO;
996 } else {
997 /* set the clean bit! If we shutdown correctly,
998 the clean bit on each component label will get
999 set */
1000 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1001 }
1002 return (retcode);
1003
1004
1005 case RAIDFRAME_ADD_HOT_SPARE:
1006 sparePtr = (RF_SingleComponent_t *) data;
1007 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1008 printf("Adding spare\n");
1009 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1010 return(retcode);
1011
1012 case RAIDFRAME_REMOVE_HOT_SPARE:
1013 return(retcode);
1014
1015 case RAIDFRAME_REBUILD_IN_PLACE:
1016
1017 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1018 /* Can't do this on a RAID 0!! */
1019 return(EINVAL);
1020 }
1021
1022 componentPtr = (RF_SingleComponent_t *) data;
1023 memcpy( &component, componentPtr,
1024 sizeof(RF_SingleComponent_t));
1025 row = component.row;
1026 column = component.column;
1027 printf("Rebuild: %d %d\n",row, column);
1028 if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
1029 (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1030 return(EINVAL);
1031 }
1032 printf("Attempting a rebuild in place\n");
1033 s = splbio();
1034 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1035 splx(s);
1036 return(retcode);
1037
1038 /* issue a test-unit-ready through raidframe to the indicated
1039 * device */
1040 #if 0 /* XXX not supported yet (ever?) */
1041 case RAIDFRAME_TUR:
1042 /* debug only */
1043 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1044 return (retcode);
1045 #endif
1046 case RAIDFRAME_GET_INFO:
1047 {
1048 RF_Raid_t *raid = raidPtrs[unit];
1049 RF_DeviceConfig_t *cfg, **ucfgp;
1050 int i, j, d;
1051
1052 if (!raid->valid)
1053 return (ENODEV);
1054 ucfgp = (RF_DeviceConfig_t **) data;
1055 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1056 (RF_DeviceConfig_t *));
1057 if (cfg == NULL)
1058 return (ENOMEM);
1059 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1060 cfg->rows = raid->numRow;
1061 cfg->cols = raid->numCol;
1062 cfg->ndevs = raid->numRow * raid->numCol;
1063 if (cfg->ndevs >= RF_MAX_DISKS) {
1064 cfg->ndevs = 0;
1065 return (ENOMEM);
1066 }
1067 cfg->nspares = raid->numSpare;
1068 if (cfg->nspares >= RF_MAX_DISKS) {
1069 cfg->nspares = 0;
1070 return (ENOMEM);
1071 }
1072 cfg->maxqdepth = raid->maxQueueDepth;
1073 d = 0;
1074 for (i = 0; i < cfg->rows; i++) {
1075 for (j = 0; j < cfg->cols; j++) {
1076 cfg->devs[d] = raid->Disks[i][j];
1077 d++;
1078 }
1079 }
1080 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1081 cfg->spares[i] = raid->Disks[0][j];
1082 }
1083 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1084 sizeof(RF_DeviceConfig_t));
1085 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1086
1087 return (retcode);
1088 }
1089 break;
1090 case RAIDFRAME_CHECK_PARITY:
1091 *(int *) data = raidPtrs[unit]->parity_good;
1092 return (0);
1093 case RAIDFRAME_RESET_ACCTOTALS:
1094 {
1095 RF_Raid_t *raid = raidPtrs[unit];
1096
1097 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1098 return (0);
1099 }
1100 break;
1101
1102 case RAIDFRAME_GET_ACCTOTALS:
1103 {
1104 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1105 RF_Raid_t *raid = raidPtrs[unit];
1106
1107 *totals = raid->acc_totals;
1108 return (0);
1109 }
1110 break;
1111
1112 case RAIDFRAME_KEEP_ACCTOTALS:
1113 {
1114 RF_Raid_t *raid = raidPtrs[unit];
1115 int *keep = (int *) data;
1116
1117 raid->keep_acc_totals = *keep;
1118 return (0);
1119 }
1120 break;
1121
1122 case RAIDFRAME_GET_SIZE:
1123 *(int *) data = raidPtrs[unit]->totalSectors;
1124 return (0);
1125
1126 #define RAIDFRAME_RECON 1
1127 /* XXX The above should probably be set somewhere else!! GO */
1128 #if RAIDFRAME_RECON > 0
1129
1130 /* fail a disk & optionally start reconstruction */
1131 case RAIDFRAME_FAIL_DISK:
1132
1133 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1134 /* Can't do this on a RAID 0!! */
1135 return(EINVAL);
1136 }
1137
1138 rr = (struct rf_recon_req *) data;
1139
1140 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1141 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1142 return (EINVAL);
1143
1144 printf("raid%d: Failing the disk: row: %d col: %d\n",
1145 unit, rr->row, rr->col);
1146
1147 /* make a copy of the recon request so that we don't rely on
1148 * the user's buffer */
1149 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1150 bcopy(rr, rrcopy, sizeof(*rr));
1151 rrcopy->raidPtr = (void *) raidPtrs[unit];
1152
1153 LOCK_RECON_Q_MUTEX();
1154 rrcopy->next = recon_queue;
1155 recon_queue = rrcopy;
1156 wakeup(&recon_queue);
1157 UNLOCK_RECON_Q_MUTEX();
1158
1159 return (0);
1160
1161 /* invoke a copyback operation after recon on whatever disk
1162 * needs it, if any */
1163 case RAIDFRAME_COPYBACK:
1164
1165 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1166 /* This makes no sense on a RAID 0!! */
1167 return(EINVAL);
1168 }
1169
1170 /* borrow the current thread to get this done */
1171
1172 s = splbio();
1173 rf_CopybackReconstructedData(raidPtrs[unit]);
1174 splx(s);
1175 return (0);
1176
1177 /* return the percentage completion of reconstruction */
1178 case RAIDFRAME_CHECKRECON:
1179 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
1180 /* This makes no sense on a RAID 0 */
1181 return(EINVAL);
1182 }
1183
1184 row = *(int *) data;
1185 if (row < 0 || row >= raidPtrs[unit]->numRow)
1186 return (EINVAL);
1187 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1188 *(int *) data = 100;
1189 else
1190 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1191 return (0);
1192
1193 /* the sparetable daemon calls this to wait for the kernel to
1194 * need a spare table. this ioctl does not return until a
1195 * spare table is needed. XXX -- calling mpsleep here in the
1196 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1197 * -- I should either compute the spare table in the kernel,
1198 * or have a different -- XXX XXX -- interface (a different
1199 * character device) for delivering the table -- XXX */
1200 #if 0
1201 case RAIDFRAME_SPARET_WAIT:
1202 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1203 while (!rf_sparet_wait_queue)
1204 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1205 waitreq = rf_sparet_wait_queue;
1206 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1207 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1208
1209 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1210
1211 RF_Free(waitreq, sizeof(*waitreq));
1212 return (0);
1213
1214
1215 /* wakes up a process waiting on SPARET_WAIT and puts an error
1216 * code in it that will cause the dameon to exit */
1217 case RAIDFRAME_ABORT_SPARET_WAIT:
1218 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1219 waitreq->fcol = -1;
1220 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1221 waitreq->next = rf_sparet_wait_queue;
1222 rf_sparet_wait_queue = waitreq;
1223 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1224 wakeup(&rf_sparet_wait_queue);
1225 return (0);
1226
1227 /* used by the spare table daemon to deliver a spare table
1228 * into the kernel */
1229 case RAIDFRAME_SEND_SPARET:
1230
1231 /* install the spare table */
1232 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1233
1234 /* respond to the requestor. the return status of the spare
1235 * table installation is passed in the "fcol" field */
1236 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1237 waitreq->fcol = retcode;
1238 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1239 waitreq->next = rf_sparet_resp_queue;
1240 rf_sparet_resp_queue = waitreq;
1241 wakeup(&rf_sparet_resp_queue);
1242 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1243
1244 return (retcode);
1245 #endif
1246
1247
1248 #endif /* RAIDFRAME_RECON > 0 */
1249
1250 default:
1251 break; /* fall through to the os-specific code below */
1252
1253 }
1254
1255 if (!raidPtrs[unit]->valid)
1256 return (EINVAL);
1257
1258 /*
1259 * Add support for "regular" device ioctls here.
1260 */
1261
1262 switch (cmd) {
1263 case DIOCGDINFO:
1264 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1265 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1266 break;
1267
1268 case DIOCGPART:
1269 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1270 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1271 ((struct partinfo *) data)->part =
1272 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1273 break;
1274
1275 case DIOCWDINFO:
1276 db1_printf(("DIOCWDINFO\n"));
1277 case DIOCSDINFO:
1278 db1_printf(("DIOCSDINFO\n"));
1279 if ((error = raidlock(rs)) != 0)
1280 return (error);
1281
1282 rs->sc_flags |= RAIDF_LABELLING;
1283
1284 error = setdisklabel(rs->sc_dkdev.dk_label,
1285 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1286 if (error == 0) {
1287 if (cmd == DIOCWDINFO)
1288 error = writedisklabel(RAIDLABELDEV(dev),
1289 raidstrategy, rs->sc_dkdev.dk_label,
1290 rs->sc_dkdev.dk_cpulabel);
1291 }
1292 rs->sc_flags &= ~RAIDF_LABELLING;
1293
1294 raidunlock(rs);
1295
1296 if (error)
1297 return (error);
1298 break;
1299
1300 case DIOCWLABEL:
1301 db1_printf(("DIOCWLABEL\n"));
1302 if (*(int *) data != 0)
1303 rs->sc_flags |= RAIDF_WLABEL;
1304 else
1305 rs->sc_flags &= ~RAIDF_WLABEL;
1306 break;
1307
1308 case DIOCGDEFLABEL:
1309 db1_printf(("DIOCGDEFLABEL\n"));
1310 raidgetdefaultlabel(raidPtrs[unit], rs,
1311 (struct disklabel *) data);
1312 break;
1313
1314 default:
1315 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1316 }
1317 return (retcode);
1318
1319 }
1320
1321
1322 /* raidinit -- complete the rest of the initialization for the
1323 RAIDframe device. */
1324
1325
1326 static int
1327 raidinit(dev, raidPtr, unit)
1328 dev_t dev;
1329 RF_Raid_t *raidPtr;
1330 int unit;
1331 {
1332 int retcode;
1333 /* int ix; */
1334 /* struct raidbuf *raidbp; */
1335 struct raid_softc *rs;
1336
1337 retcode = 0;
1338
1339 rs = &raid_softc[unit];
1340 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1341 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1342
1343
1344 /* XXX should check return code first... */
1345 rs->sc_flags |= RAIDF_INITED;
1346
1347 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1348
1349 rs->sc_dkdev.dk_name = rs->sc_xname;
1350
1351 /* disk_attach actually creates space for the CPU disklabel, among
1352 * other things, so it's critical to call this *BEFORE* we try putzing
1353 * with disklabels. */
1354
1355 disk_attach(&rs->sc_dkdev);
1356
1357 /* XXX There may be a weird interaction here between this, and
1358 * protectedSectors, as used in RAIDframe. */
1359
1360 rs->sc_size = raidPtr->totalSectors;
1361 rs->sc_dev = dev;
1362
1363 return (retcode);
1364 }
1365
1366 /*
1367 * This kernel thread never exits. It is created once, and persists
1368 * until the system reboots.
1369 */
1370
1371 void
1372 rf_ReconKernelThread()
1373 {
1374 struct rf_recon_req *req;
1375 int s;
1376
1377 /* XXX not sure what spl() level we should be at here... probably
1378 * splbio() */
1379 s = splbio();
1380
1381 while (1) {
1382 /* grab the next reconstruction request from the queue */
1383 LOCK_RECON_Q_MUTEX();
1384 while (!recon_queue) {
1385 UNLOCK_RECON_Q_MUTEX();
1386 tsleep(&recon_queue, PRIBIO,
1387 "raidframe recon", 0);
1388 LOCK_RECON_Q_MUTEX();
1389 }
1390 req = recon_queue;
1391 recon_queue = recon_queue->next;
1392 UNLOCK_RECON_Q_MUTEX();
1393
1394 /*
1395 * If flags specifies that we should start recon, this call
1396 * will not return until reconstruction completes, fails,
1397 * or is aborted.
1398 */
1399 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1400 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1401
1402 RF_Free(req, sizeof(*req));
1403 }
1404 }
1405 /* wake up the daemon & tell it to get us a spare table
1406 * XXX
1407 * the entries in the queues should be tagged with the raidPtr
1408 * so that in the extremely rare case that two recons happen at once,
1409 * we know for which device were requesting a spare table
1410 * XXX
1411 */
1412 int
1413 rf_GetSpareTableFromDaemon(req)
1414 RF_SparetWait_t *req;
1415 {
1416 int retcode;
1417
1418 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1419 req->next = rf_sparet_wait_queue;
1420 rf_sparet_wait_queue = req;
1421 wakeup(&rf_sparet_wait_queue);
1422
1423 /* mpsleep unlocks the mutex */
1424 while (!rf_sparet_resp_queue) {
1425 tsleep(&rf_sparet_resp_queue, PRIBIO,
1426 "raidframe getsparetable", 0);
1427 #if 0
1428 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1429 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1430 MS_LOCK_SIMPLE);
1431 #endif
1432 }
1433 req = rf_sparet_resp_queue;
1434 rf_sparet_resp_queue = req->next;
1435 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1436
1437 retcode = req->fcol;
1438 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1439 * alloc'd */
1440 return (retcode);
1441 }
1442 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1443 * bp & passes it down.
1444 * any calls originating in the kernel must use non-blocking I/O
1445 * do some extra sanity checking to return "appropriate" error values for
1446 * certain conditions (to make some standard utilities work)
1447 */
1448 int
1449 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1450 RF_Raid_t *raidPtr;
1451 struct buf *bp;
1452 RF_RaidAccessFlags_t flags;
1453 void (*cbFunc) (struct buf *);
1454 void *cbArg;
1455 {
1456 RF_SectorCount_t num_blocks, pb, sum;
1457 RF_RaidAddr_t raid_addr;
1458 int retcode;
1459 struct partition *pp;
1460 daddr_t blocknum;
1461 int unit;
1462 struct raid_softc *rs;
1463 int do_async;
1464
1465 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1466
1467 unit = raidPtr->raidid;
1468 rs = &raid_softc[unit];
1469
1470 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1471 * partition.. Need to make it absolute to the underlying device.. */
1472
1473 blocknum = bp->b_blkno;
1474 if (DISKPART(bp->b_dev) != RAW_PART) {
1475 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1476 blocknum += pp->p_offset;
1477 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1478 pp->p_offset));
1479 } else {
1480 db1_printf(("Is raw..\n"));
1481 }
1482 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1483
1484 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1485 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1486
1487 /* *THIS* is where we adjust what block we're going to... but DO NOT
1488 * TOUCH bp->b_blkno!!! */
1489 raid_addr = blocknum;
1490
1491 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1492 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1493 sum = raid_addr + num_blocks + pb;
1494 if (1 || rf_debugKernelAccess) {
1495 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1496 (int) raid_addr, (int) sum, (int) num_blocks,
1497 (int) pb, (int) bp->b_resid));
1498 }
1499 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1500 || (sum < num_blocks) || (sum < pb)) {
1501 bp->b_error = ENOSPC;
1502 bp->b_flags |= B_ERROR;
1503 bp->b_resid = bp->b_bcount;
1504 biodone(bp);
1505 return (bp->b_error);
1506 }
1507 /*
1508 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1509 */
1510
1511 if (bp->b_bcount & raidPtr->sectorMask) {
1512 bp->b_error = EINVAL;
1513 bp->b_flags |= B_ERROR;
1514 bp->b_resid = bp->b_bcount;
1515 biodone(bp);
1516 return (bp->b_error);
1517 }
1518 db1_printf(("Calling DoAccess..\n"));
1519
1520
1521 /* Put a throttle on the number of requests we handle simultanously */
1522
1523 RF_LOCK_MUTEX(raidPtr->mutex);
1524
1525 while(raidPtr->openings <= 0) {
1526 RF_UNLOCK_MUTEX(raidPtr->mutex);
1527 (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
1528 RF_LOCK_MUTEX(raidPtr->mutex);
1529 }
1530 raidPtr->openings--;
1531
1532 RF_UNLOCK_MUTEX(raidPtr->mutex);
1533
1534 /*
1535 * Everything is async.
1536 */
1537 do_async = 1;
1538
1539 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1540 * B_READ instead */
1541 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1542 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1543 do_async, raid_addr, num_blocks,
1544 bp->b_un.b_addr,
1545 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1546 NULL, cbFunc, cbArg);
1547 #if 0
1548 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1549 bp->b_data, (int) bp->b_resid));
1550 #endif
1551
1552 return (retcode);
1553 }
1554 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1555
1556 int
1557 rf_DispatchKernelIO(queue, req)
1558 RF_DiskQueue_t *queue;
1559 RF_DiskQueueData_t *req;
1560 {
1561 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1562 struct buf *bp;
1563 struct raidbuf *raidbp = NULL;
1564 struct raid_softc *rs;
1565 int unit;
1566
1567 /* XXX along with the vnode, we also need the softc associated with
1568 * this device.. */
1569
1570 req->queue = queue;
1571
1572 unit = queue->raidPtr->raidid;
1573
1574 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1575
1576 if (unit >= numraid) {
1577 printf("Invalid unit number: %d %d\n", unit, numraid);
1578 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1579 }
1580 rs = &raid_softc[unit];
1581
1582 /* XXX is this the right place? */
1583 disk_busy(&rs->sc_dkdev);
1584
1585 bp = req->bp;
1586 #if 1
1587 /* XXX when there is a physical disk failure, someone is passing us a
1588 * buffer that contains old stuff!! Attempt to deal with this problem
1589 * without taking a performance hit... (not sure where the real bug
1590 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1591
1592 if (bp->b_flags & B_ERROR) {
1593 bp->b_flags &= ~B_ERROR;
1594 }
1595 if (bp->b_error != 0) {
1596 bp->b_error = 0;
1597 }
1598 #endif
1599 raidbp = RAIDGETBUF(rs);
1600
1601 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1602
1603 /*
1604 * context for raidiodone
1605 */
1606 raidbp->rf_obp = bp;
1607 raidbp->req = req;
1608
1609 switch (req->type) {
1610 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1611 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1612 * queue->row, queue->col); */
1613 /* XXX need to do something extra here.. */
1614 /* I'm leaving this in, as I've never actually seen it used,
1615 * and I'd like folks to report it... GO */
1616 printf(("WAKEUP CALLED\n"));
1617 queue->numOutstanding++;
1618
1619 /* XXX need to glue the original buffer into this?? */
1620
1621 KernelWakeupFunc(&raidbp->rf_buf);
1622 break;
1623
1624 case RF_IO_TYPE_READ:
1625 case RF_IO_TYPE_WRITE:
1626
1627 if (req->tracerec) {
1628 RF_ETIMER_START(req->tracerec->timer);
1629 }
1630 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1631 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1632 req->sectorOffset, req->numSector,
1633 req->buf, KernelWakeupFunc, (void *) req,
1634 queue->raidPtr->logBytesPerSector, req->b_proc);
1635
1636 if (rf_debugKernelAccess) {
1637 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1638 (long) bp->b_blkno));
1639 }
1640 queue->numOutstanding++;
1641 queue->last_deq_sector = req->sectorOffset;
1642 /* acc wouldn't have been let in if there were any pending
1643 * reqs at any other priority */
1644 queue->curPriority = req->priority;
1645 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1646 * req->type, queue->row, queue->col); */
1647
1648 db1_printf(("Going for %c to unit %d row %d col %d\n",
1649 req->type, unit, queue->row, queue->col));
1650 db1_printf(("sector %d count %d (%d bytes) %d\n",
1651 (int) req->sectorOffset, (int) req->numSector,
1652 (int) (req->numSector <<
1653 queue->raidPtr->logBytesPerSector),
1654 (int) queue->raidPtr->logBytesPerSector));
1655 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1656 raidbp->rf_buf.b_vp->v_numoutput++;
1657 }
1658 VOP_STRATEGY(&raidbp->rf_buf);
1659
1660 break;
1661
1662 default:
1663 panic("bad req->type in rf_DispatchKernelIO");
1664 }
1665 db1_printf(("Exiting from DispatchKernelIO\n"));
1666 return (0);
1667 }
1668 /* this is the callback function associated with a I/O invoked from
1669 kernel code.
1670 */
1671 static void
1672 KernelWakeupFunc(vbp)
1673 struct buf *vbp;
1674 {
1675 RF_DiskQueueData_t *req = NULL;
1676 RF_DiskQueue_t *queue;
1677 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1678 struct buf *bp;
1679 struct raid_softc *rs;
1680 int unit;
1681 register int s;
1682
1683 s = splbio(); /* XXX */
1684 db1_printf(("recovering the request queue:\n"));
1685 req = raidbp->req;
1686
1687 bp = raidbp->rf_obp;
1688 #if 0
1689 db1_printf(("bp=0x%x\n", bp));
1690 #endif
1691
1692 queue = (RF_DiskQueue_t *) req->queue;
1693
1694 if (raidbp->rf_buf.b_flags & B_ERROR) {
1695 #if 0
1696 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1697 #endif
1698 bp->b_flags |= B_ERROR;
1699 bp->b_error = raidbp->rf_buf.b_error ?
1700 raidbp->rf_buf.b_error : EIO;
1701 }
1702 #if 0
1703 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1704 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1705 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1706 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1707 #endif
1708
1709 /* XXX methinks this could be wrong... */
1710 #if 1
1711 bp->b_resid = raidbp->rf_buf.b_resid;
1712 #endif
1713
1714 if (req->tracerec) {
1715 RF_ETIMER_STOP(req->tracerec->timer);
1716 RF_ETIMER_EVAL(req->tracerec->timer);
1717 RF_LOCK_MUTEX(rf_tracing_mutex);
1718 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1719 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1720 req->tracerec->num_phys_ios++;
1721 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1722 }
1723 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1724
1725 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1726
1727
1728 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1729 * ballistic, and mark the component as hosed... */
1730 #if 1
1731 if (bp->b_flags & B_ERROR) {
1732 /* Mark the disk as dead */
1733 /* but only mark it once... */
1734 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1735 rf_ds_optimal) {
1736 printf("raid%d: IO Error. Marking %s as failed.\n",
1737 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1738 queue->raidPtr->Disks[queue->row][queue->col].status =
1739 rf_ds_failed;
1740 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1741 queue->raidPtr->numFailures++;
1742 /* XXX here we should bump the version number for each component, and write that data out */
1743 } else { /* Disk is already dead... */
1744 /* printf("Disk already marked as dead!\n"); */
1745 }
1746
1747 }
1748 #endif
1749
1750 rs = &raid_softc[unit];
1751 RAIDPUTBUF(rs, raidbp);
1752
1753
1754 if (bp->b_resid == 0) {
1755 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1756 unit, bp->b_resid, bp->b_bcount));
1757 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1758 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1759 } else {
1760 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1761 }
1762
1763 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1764 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1765 /* printf("Exiting KernelWakeupFunc\n"); */
1766
1767 splx(s); /* XXX */
1768 }
1769
1770
1771
1772 /*
1773 * initialize a buf structure for doing an I/O in the kernel.
1774 */
1775 static void
1776 InitBP(
1777 struct buf * bp,
1778 struct vnode * b_vp,
1779 unsigned rw_flag,
1780 dev_t dev,
1781 RF_SectorNum_t startSect,
1782 RF_SectorCount_t numSect,
1783 caddr_t buf,
1784 void (*cbFunc) (struct buf *),
1785 void *cbArg,
1786 int logBytesPerSector,
1787 struct proc * b_proc)
1788 {
1789 /* bp->b_flags = B_PHYS | rw_flag; */
1790 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1791 bp->b_bcount = numSect << logBytesPerSector;
1792 bp->b_bufsize = bp->b_bcount;
1793 bp->b_error = 0;
1794 bp->b_dev = dev;
1795 db1_printf(("bp->b_dev is %d\n", dev));
1796 bp->b_un.b_addr = buf;
1797 #if 0
1798 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1799 #endif
1800
1801 bp->b_blkno = startSect;
1802 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1803 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1804 if (bp->b_bcount == 0) {
1805 panic("bp->b_bcount is zero in InitBP!!\n");
1806 }
1807 bp->b_proc = b_proc;
1808 bp->b_iodone = cbFunc;
1809 bp->b_vp = b_vp;
1810
1811 }
1812 /* Extras... */
1813
1814 #if 0
1815 int
1816 rf_GetSpareTableFromDaemon(req)
1817 RF_SparetWait_t *req;
1818 {
1819 int retcode = 1;
1820 printf("This is supposed to do something useful!!\n"); /* XXX */
1821
1822 return (retcode);
1823
1824 }
1825 #endif
1826
1827 static void
1828 raidgetdefaultlabel(raidPtr, rs, lp)
1829 RF_Raid_t *raidPtr;
1830 struct raid_softc *rs;
1831 struct disklabel *lp;
1832 {
1833 db1_printf(("Building a default label...\n"));
1834 bzero(lp, sizeof(*lp));
1835
1836 /* fabricate a label... */
1837 lp->d_secperunit = raidPtr->totalSectors;
1838 lp->d_secsize = raidPtr->bytesPerSector;
1839 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1840 lp->d_ntracks = 1;
1841 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1842 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1843
1844 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1845 lp->d_type = DTYPE_RAID;
1846 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1847 lp->d_rpm = 3600;
1848 lp->d_interleave = 1;
1849 lp->d_flags = 0;
1850
1851 lp->d_partitions[RAW_PART].p_offset = 0;
1852 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1853 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1854 lp->d_npartitions = RAW_PART + 1;
1855
1856 lp->d_magic = DISKMAGIC;
1857 lp->d_magic2 = DISKMAGIC;
1858 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1859
1860 }
1861 /*
1862 * Read the disklabel from the raid device. If one is not present, fake one
1863 * up.
1864 */
1865 static void
1866 raidgetdisklabel(dev)
1867 dev_t dev;
1868 {
1869 int unit = raidunit(dev);
1870 struct raid_softc *rs = &raid_softc[unit];
1871 char *errstring;
1872 struct disklabel *lp = rs->sc_dkdev.dk_label;
1873 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1874 RF_Raid_t *raidPtr;
1875
1876 db1_printf(("Getting the disklabel...\n"));
1877
1878 bzero(clp, sizeof(*clp));
1879
1880 raidPtr = raidPtrs[unit];
1881
1882 raidgetdefaultlabel(raidPtr, rs, lp);
1883
1884 /*
1885 * Call the generic disklabel extraction routine.
1886 */
1887 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1888 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1889 if (errstring)
1890 raidmakedisklabel(rs);
1891 else {
1892 int i;
1893 struct partition *pp;
1894
1895 /*
1896 * Sanity check whether the found disklabel is valid.
1897 *
1898 * This is necessary since total size of the raid device
1899 * may vary when an interleave is changed even though exactly
1900 * same componets are used, and old disklabel may used
1901 * if that is found.
1902 */
1903 if (lp->d_secperunit != rs->sc_size)
1904 printf("WARNING: %s: "
1905 "total sector size in disklabel (%d) != "
1906 "the size of raid (%ld)\n", rs->sc_xname,
1907 lp->d_secperunit, (long) rs->sc_size);
1908 for (i = 0; i < lp->d_npartitions; i++) {
1909 pp = &lp->d_partitions[i];
1910 if (pp->p_offset + pp->p_size > rs->sc_size)
1911 printf("WARNING: %s: end of partition `%c' "
1912 "exceeds the size of raid (%ld)\n",
1913 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1914 }
1915 }
1916
1917 }
1918 /*
1919 * Take care of things one might want to take care of in the event
1920 * that a disklabel isn't present.
1921 */
1922 static void
1923 raidmakedisklabel(rs)
1924 struct raid_softc *rs;
1925 {
1926 struct disklabel *lp = rs->sc_dkdev.dk_label;
1927 db1_printf(("Making a label..\n"));
1928
1929 /*
1930 * For historical reasons, if there's no disklabel present
1931 * the raw partition must be marked FS_BSDFFS.
1932 */
1933
1934 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1935
1936 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1937
1938 lp->d_checksum = dkcksum(lp);
1939 }
1940 /*
1941 * Lookup the provided name in the filesystem. If the file exists,
1942 * is a valid block device, and isn't being used by anyone else,
1943 * set *vpp to the file's vnode.
1944 * You'll find the original of this in ccd.c
1945 */
1946 int
1947 raidlookup(path, p, vpp)
1948 char *path;
1949 struct proc *p;
1950 struct vnode **vpp; /* result */
1951 {
1952 struct nameidata nd;
1953 struct vnode *vp;
1954 struct vattr va;
1955 int error;
1956
1957 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1958 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1959 #ifdef DEBUG
1960 printf("RAIDframe: vn_open returned %d\n", error);
1961 #endif
1962 return (error);
1963 }
1964 vp = nd.ni_vp;
1965 if (vp->v_usecount > 1) {
1966 VOP_UNLOCK(vp, 0);
1967 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1968 return (EBUSY);
1969 }
1970 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1971 VOP_UNLOCK(vp, 0);
1972 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1973 return (error);
1974 }
1975 /* XXX: eventually we should handle VREG, too. */
1976 if (va.va_type != VBLK) {
1977 VOP_UNLOCK(vp, 0);
1978 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1979 return (ENOTBLK);
1980 }
1981 VOP_UNLOCK(vp, 0);
1982 *vpp = vp;
1983 return (0);
1984 }
1985 /*
1986 * Wait interruptibly for an exclusive lock.
1987 *
1988 * XXX
1989 * Several drivers do this; it should be abstracted and made MP-safe.
1990 * (Hmm... where have we seen this warning before :-> GO )
1991 */
1992 static int
1993 raidlock(rs)
1994 struct raid_softc *rs;
1995 {
1996 int error;
1997
1998 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1999 rs->sc_flags |= RAIDF_WANTED;
2000 if ((error =
2001 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2002 return (error);
2003 }
2004 rs->sc_flags |= RAIDF_LOCKED;
2005 return (0);
2006 }
2007 /*
2008 * Unlock and wake up any waiters.
2009 */
2010 static void
2011 raidunlock(rs)
2012 struct raid_softc *rs;
2013 {
2014
2015 rs->sc_flags &= ~RAIDF_LOCKED;
2016 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2017 rs->sc_flags &= ~RAIDF_WANTED;
2018 wakeup(rs);
2019 }
2020 }
2021
2022
2023 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2024 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2025
2026 int
2027 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2028 {
2029 RF_ComponentLabel_t component_label;
2030 raidread_component_label(dev, b_vp, &component_label);
2031 component_label.mod_counter = mod_counter;
2032 component_label.clean = RF_RAID_CLEAN;
2033 raidwrite_component_label(dev, b_vp, &component_label);
2034 return(0);
2035 }
2036
2037
2038 int
2039 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2040 {
2041 RF_ComponentLabel_t component_label;
2042 raidread_component_label(dev, b_vp, &component_label);
2043 component_label.mod_counter = mod_counter;
2044 component_label.clean = RF_RAID_DIRTY;
2045 raidwrite_component_label(dev, b_vp, &component_label);
2046 return(0);
2047 }
2048
2049 /* ARGSUSED */
2050 int
2051 raidread_component_label(dev, b_vp, component_label)
2052 dev_t dev;
2053 struct vnode *b_vp;
2054 RF_ComponentLabel_t *component_label;
2055 {
2056 struct buf *bp;
2057 int error;
2058
2059 /* XXX should probably ensure that we don't try to do this if
2060 someone has changed rf_protected_sectors. */
2061
2062 /* get a block of the appropriate size... */
2063 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2064 bp->b_dev = dev;
2065
2066 /* get our ducks in a row for the read */
2067 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2068 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2069 bp->b_flags = B_BUSY | B_READ;
2070 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2071
2072 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2073
2074 error = biowait(bp);
2075
2076 if (!error) {
2077 memcpy(component_label, bp->b_un.b_addr,
2078 sizeof(RF_ComponentLabel_t));
2079 #if 0
2080 printf("raidread_component_label: got component label:\n");
2081 printf("Version: %d\n",component_label->version);
2082 printf("Serial Number: %d\n",component_label->serial_number);
2083 printf("Mod counter: %d\n",component_label->mod_counter);
2084 printf("Row: %d\n", component_label->row);
2085 printf("Column: %d\n", component_label->column);
2086 printf("Num Rows: %d\n", component_label->num_rows);
2087 printf("Num Columns: %d\n", component_label->num_columns);
2088 printf("Clean: %d\n", component_label->clean);
2089 printf("Status: %d\n", component_label->status);
2090 #endif
2091 } else {
2092 printf("Failed to read RAID component label!\n");
2093 }
2094
2095 bp->b_flags = B_INVAL | B_AGE;
2096 brelse(bp);
2097 return(error);
2098 }
2099 /* ARGSUSED */
2100 int
2101 raidwrite_component_label(dev, b_vp, component_label)
2102 dev_t dev;
2103 struct vnode *b_vp;
2104 RF_ComponentLabel_t *component_label;
2105 {
2106 struct buf *bp;
2107 int error;
2108
2109 /* get a block of the appropriate size... */
2110 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2111 bp->b_dev = dev;
2112
2113 /* get our ducks in a row for the write */
2114 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2115 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2116 bp->b_flags = B_BUSY | B_WRITE;
2117 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2118
2119 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2120
2121 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2122
2123 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2124 error = biowait(bp);
2125 bp->b_flags = B_INVAL | B_AGE;
2126 brelse(bp);
2127 if (error) {
2128 printf("Failed to write RAID component info!\n");
2129 }
2130
2131 return(error);
2132 }
2133
2134 void
2135 rf_markalldirty( raidPtr )
2136 RF_Raid_t *raidPtr;
2137 {
2138 RF_ComponentLabel_t c_label;
2139 int r,c;
2140
2141 raidPtr->mod_counter++;
2142 for (r = 0; r < raidPtr->numRow; r++) {
2143 for (c = 0; c < raidPtr->numCol; c++) {
2144 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2145 raidread_component_label(
2146 raidPtr->Disks[r][c].dev,
2147 raidPtr->raid_cinfo[r][c].ci_vp,
2148 &c_label);
2149 if (c_label.status == rf_ds_spared) {
2150 /* XXX do something special...
2151 but whatever you do, don't
2152 try to access it!! */
2153 } else {
2154 #if 0
2155 c_label.status =
2156 raidPtr->Disks[r][c].status;
2157 raidwrite_component_label(
2158 raidPtr->Disks[r][c].dev,
2159 raidPtr->raid_cinfo[r][c].ci_vp,
2160 &c_label);
2161 #endif
2162 raidmarkdirty(
2163 raidPtr->Disks[r][c].dev,
2164 raidPtr->raid_cinfo[r][c].ci_vp,
2165 raidPtr->mod_counter);
2166 }
2167 }
2168 }
2169 }
2170 /* printf("Component labels marked dirty.\n"); */
2171 #if 0
2172 for( c = 0; c < raidPtr->numSpare ; c++) {
2173 sparecol = raidPtr->numCol + c;
2174 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2175 /*
2176
2177 XXX this is where we get fancy and map this spare
2178 into it's correct spot in the array.
2179
2180 */
2181 /*
2182
2183 we claim this disk is "optimal" if it's
2184 rf_ds_used_spare, as that means it should be
2185 directly substitutable for the disk it replaced.
2186 We note that too...
2187
2188 */
2189
2190 for(i=0;i<raidPtr->numRow;i++) {
2191 for(j=0;j<raidPtr->numCol;j++) {
2192 if ((raidPtr->Disks[i][j].spareRow ==
2193 r) &&
2194 (raidPtr->Disks[i][j].spareCol ==
2195 sparecol)) {
2196 srow = r;
2197 scol = sparecol;
2198 break;
2199 }
2200 }
2201 }
2202
2203 raidread_component_label(
2204 raidPtr->Disks[r][sparecol].dev,
2205 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2206 &c_label);
2207 /* make sure status is noted */
2208 c_label.version = RF_COMPONENT_LABEL_VERSION;
2209 c_label.mod_counter = raidPtr->mod_counter;
2210 c_label.serial_number = raidPtr->serial_number;
2211 c_label.row = srow;
2212 c_label.column = scol;
2213 c_label.num_rows = raidPtr->numRow;
2214 c_label.num_columns = raidPtr->numCol;
2215 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2216 c_label.status = rf_ds_optimal;
2217 raidwrite_component_label(
2218 raidPtr->Disks[r][sparecol].dev,
2219 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2220 &c_label);
2221 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2222 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2223 }
2224 }
2225
2226 #endif
2227 }
2228
2229
2230 void
2231 rf_update_component_labels( raidPtr )
2232 RF_Raid_t *raidPtr;
2233 {
2234 RF_ComponentLabel_t c_label;
2235 int sparecol;
2236 int r,c;
2237 int i,j;
2238 int srow, scol;
2239
2240 srow = -1;
2241 scol = -1;
2242
2243 /* XXX should do extra checks to make sure things really are clean,
2244 rather than blindly setting the clean bit... */
2245
2246 raidPtr->mod_counter++;
2247
2248 for (r = 0; r < raidPtr->numRow; r++) {
2249 for (c = 0; c < raidPtr->numCol; c++) {
2250 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2251 raidread_component_label(
2252 raidPtr->Disks[r][c].dev,
2253 raidPtr->raid_cinfo[r][c].ci_vp,
2254 &c_label);
2255 /* make sure status is noted */
2256 c_label.status = rf_ds_optimal;
2257 raidwrite_component_label(
2258 raidPtr->Disks[r][c].dev,
2259 raidPtr->raid_cinfo[r][c].ci_vp,
2260 &c_label);
2261 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2262 raidmarkclean(
2263 raidPtr->Disks[r][c].dev,
2264 raidPtr->raid_cinfo[r][c].ci_vp,
2265 raidPtr->mod_counter);
2266 }
2267 }
2268 /* else we don't touch it.. */
2269 #if 0
2270 else if (raidPtr->Disks[r][c].status !=
2271 rf_ds_failed) {
2272 raidread_component_label(
2273 raidPtr->Disks[r][c].dev,
2274 raidPtr->raid_cinfo[r][c].ci_vp,
2275 &c_label);
2276 /* make sure status is noted */
2277 c_label.status =
2278 raidPtr->Disks[r][c].status;
2279 raidwrite_component_label(
2280 raidPtr->Disks[r][c].dev,
2281 raidPtr->raid_cinfo[r][c].ci_vp,
2282 &c_label);
2283 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2284 raidmarkclean(
2285 raidPtr->Disks[r][c].dev,
2286 raidPtr->raid_cinfo[r][c].ci_vp,
2287 raidPtr->mod_counter);
2288 }
2289 }
2290 #endif
2291 }
2292 }
2293
2294 for( c = 0; c < raidPtr->numSpare ; c++) {
2295 sparecol = raidPtr->numCol + c;
2296 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2297 /*
2298
2299 we claim this disk is "optimal" if it's
2300 rf_ds_used_spare, as that means it should be
2301 directly substitutable for the disk it replaced.
2302 We note that too...
2303
2304 */
2305
2306 for(i=0;i<raidPtr->numRow;i++) {
2307 for(j=0;j<raidPtr->numCol;j++) {
2308 if ((raidPtr->Disks[i][j].spareRow ==
2309 0) &&
2310 (raidPtr->Disks[i][j].spareCol ==
2311 sparecol)) {
2312 srow = i;
2313 scol = j;
2314 break;
2315 }
2316 }
2317 }
2318
2319 raidread_component_label(
2320 raidPtr->Disks[0][sparecol].dev,
2321 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2322 &c_label);
2323 /* make sure status is noted */
2324 c_label.version = RF_COMPONENT_LABEL_VERSION;
2325 c_label.mod_counter = raidPtr->mod_counter;
2326 c_label.serial_number = raidPtr->serial_number;
2327 c_label.row = srow;
2328 c_label.column = scol;
2329 c_label.num_rows = raidPtr->numRow;
2330 c_label.num_columns = raidPtr->numCol;
2331 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2332 c_label.status = rf_ds_optimal;
2333 raidwrite_component_label(
2334 raidPtr->Disks[0][sparecol].dev,
2335 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2336 &c_label);
2337 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2338 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2339 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2340 raidPtr->mod_counter);
2341 }
2342 }
2343 }
2344 /* printf("Component labels updated\n"); */
2345 }
2346