rf_netbsdkintf.c revision 1.16 1 /* $NetBSD: rf_netbsdkintf.c,v 1.16 1999/03/27 01:26:37 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 #define RAIDLABELDEV(dev) \
272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276
277 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
278 struct disklabel *));
279 static void raidgetdisklabel __P((dev_t));
280 static void raidmakedisklabel __P((struct raid_softc *));
281
282 static int raidlock __P((struct raid_softc *));
283 static void raidunlock __P((struct raid_softc *));
284 int raidlookup __P((char *, struct proc * p, struct vnode **));
285
286 static void rf_markalldirty __P((RF_Raid_t *));
287
288 void
289 raidattach(num)
290 int num;
291 {
292 int raidID;
293 int i, rc;
294
295 #ifdef DEBUG
296 printf("raidattach: Asked for %d units\n", num);
297 #endif
298
299 if (num <= 0) {
300 #ifdef DIAGNOSTIC
301 panic("raidattach: count <= 0");
302 #endif
303 return;
304 }
305 /* This is where all the initialization stuff gets done. */
306
307 /* Make some space for requested number of units... */
308
309 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
310 if (raidPtrs == NULL) {
311 panic("raidPtrs is NULL!!\n");
312 }
313
314 rc = rf_mutex_init(&rf_sparet_wait_mutex);
315 if (rc) {
316 RF_PANIC();
317 }
318
319 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
320 recon_queue = NULL;
321
322 for (i = 0; i < numraid; i++)
323 raidPtrs[i] = NULL;
324 rc = rf_BootRaidframe();
325 if (rc == 0)
326 printf("Kernelized RAIDframe activated\n");
327 else
328 panic("Serious error booting RAID!!\n");
329
330 rf_kbooted = RFK_BOOT_GOOD;
331
332 /* put together some datastructures like the CCD device does.. This
333 * lets us lock the device and what-not when it gets opened. */
334
335 raid_softc = (struct raid_softc *)
336 malloc(num * sizeof(struct raid_softc),
337 M_RAIDFRAME, M_NOWAIT);
338 if (raid_softc == NULL) {
339 printf("WARNING: no memory for RAIDframe driver\n");
340 return;
341 }
342 numraid = num;
343 bzero(raid_softc, num * sizeof(struct raid_softc));
344
345 for (raidID = 0; raidID < num; raidID++) {
346 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
347 (RF_Raid_t *));
348 if (raidPtrs[raidID] == NULL) {
349 printf("raidPtrs[%d] is NULL\n", raidID);
350 }
351 }
352 }
353
354
355 int
356 raidsize(dev)
357 dev_t dev;
358 {
359 struct raid_softc *rs;
360 struct disklabel *lp;
361 int part, unit, omask, size;
362
363 unit = raidunit(dev);
364 if (unit >= numraid)
365 return (-1);
366 rs = &raid_softc[unit];
367
368 if ((rs->sc_flags & RAIDF_INITED) == 0)
369 return (-1);
370
371 part = DISKPART(dev);
372 omask = rs->sc_dkdev.dk_openmask & (1 << part);
373 lp = rs->sc_dkdev.dk_label;
374
375 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
376 return (-1);
377
378 if (lp->d_partitions[part].p_fstype != FS_SWAP)
379 size = -1;
380 else
381 size = lp->d_partitions[part].p_size *
382 (lp->d_secsize / DEV_BSIZE);
383
384 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
385 return (-1);
386
387 return (size);
388
389 }
390
391 int
392 raiddump(dev, blkno, va, size)
393 dev_t dev;
394 daddr_t blkno;
395 caddr_t va;
396 size_t size;
397 {
398 /* Not implemented. */
399 return ENXIO;
400 }
401 /* ARGSUSED */
402 int
403 raidopen(dev, flags, fmt, p)
404 dev_t dev;
405 int flags, fmt;
406 struct proc *p;
407 {
408 int unit = raidunit(dev);
409 struct raid_softc *rs;
410 struct disklabel *lp;
411 int part, pmask;
412 int error = 0;
413
414 if (unit >= numraid)
415 return (ENXIO);
416 rs = &raid_softc[unit];
417
418 if ((error = raidlock(rs)) != 0)
419 return (error);
420 lp = rs->sc_dkdev.dk_label;
421
422 part = DISKPART(dev);
423 pmask = (1 << part);
424
425 db1_printf(("Opening raid device number: %d partition: %d\n",
426 unit, part));
427
428
429 if ((rs->sc_flags & RAIDF_INITED) &&
430 (rs->sc_dkdev.dk_openmask == 0))
431 raidgetdisklabel(dev);
432
433 /* make sure that this partition exists */
434
435 if (part != RAW_PART) {
436 db1_printf(("Not a raw partition..\n"));
437 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
438 ((part >= lp->d_npartitions) ||
439 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
440 error = ENXIO;
441 raidunlock(rs);
442 db1_printf(("Bailing out...\n"));
443 return (error);
444 }
445 }
446 /* Prevent this unit from being unconfigured while open. */
447 switch (fmt) {
448 case S_IFCHR:
449 rs->sc_dkdev.dk_copenmask |= pmask;
450 break;
451
452 case S_IFBLK:
453 rs->sc_dkdev.dk_bopenmask |= pmask;
454 break;
455 }
456
457 if ((rs->sc_dkdev.dk_openmask == 0) &&
458 ((rs->sc_flags & RAIDF_INITED) != 0)) {
459 /* First one... mark things as dirty... Note that we *MUST*
460 have done a configure before this. I DO NOT WANT TO BE
461 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
462 THAT THEY BELONG TOGETHER!!!!! */
463 /* XXX should check to see if we're only open for reading
464 here... If so, we needn't do this, but then need some
465 other way of keeping track of what's happened.. */
466
467 rf_markalldirty( raidPtrs[unit] );
468 }
469
470
471 rs->sc_dkdev.dk_openmask =
472 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
473
474 raidunlock(rs);
475
476 return (error);
477
478
479 }
480 /* ARGSUSED */
481 int
482 raidclose(dev, flags, fmt, p)
483 dev_t dev;
484 int flags, fmt;
485 struct proc *p;
486 {
487 int unit = raidunit(dev);
488 struct raid_softc *rs;
489 int error = 0;
490 int part;
491
492 if (unit >= numraid)
493 return (ENXIO);
494 rs = &raid_softc[unit];
495
496 if ((error = raidlock(rs)) != 0)
497 return (error);
498
499 part = DISKPART(dev);
500
501 /* ...that much closer to allowing unconfiguration... */
502 switch (fmt) {
503 case S_IFCHR:
504 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
505 break;
506
507 case S_IFBLK:
508 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
509 break;
510 }
511 rs->sc_dkdev.dk_openmask =
512 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
513
514 if ((rs->sc_dkdev.dk_openmask == 0) &&
515 ((rs->sc_flags & RAIDF_INITED) != 0)) {
516 /* Last one... device is not unconfigured yet.
517 Device shutdown has taken care of setting the
518 clean bits if RAIDF_INITED is not set
519 mark things as clean... */
520 rf_update_component_labels( raidPtrs[unit] );
521 }
522
523 raidunlock(rs);
524 return (0);
525
526 }
527
528 void
529 raidstrategy(bp)
530 register struct buf *bp;
531 {
532 register int s;
533
534 unsigned int raidID = raidunit(bp->b_dev);
535 RF_Raid_t *raidPtr;
536 struct raid_softc *rs = &raid_softc[raidID];
537 struct disklabel *lp;
538 int wlabel;
539
540 #if 0
541 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
542 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
543 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
544 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
545 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
546
547 if (bp->b_flags & B_READ)
548 db1_printf(("READ\n"));
549 else
550 db1_printf(("WRITE\n"));
551 #endif
552 if (rf_kbooted != RFK_BOOT_GOOD)
553 return;
554 if (raidID >= numraid || !raidPtrs[raidID]) {
555 bp->b_error = ENODEV;
556 bp->b_flags |= B_ERROR;
557 bp->b_resid = bp->b_bcount;
558 biodone(bp);
559 return;
560 }
561 raidPtr = raidPtrs[raidID];
562 if (!raidPtr->valid) {
563 bp->b_error = ENODEV;
564 bp->b_flags |= B_ERROR;
565 bp->b_resid = bp->b_bcount;
566 biodone(bp);
567 return;
568 }
569 if (bp->b_bcount == 0) {
570 db1_printf(("b_bcount is zero..\n"));
571 biodone(bp);
572 return;
573 }
574 lp = rs->sc_dkdev.dk_label;
575
576 /*
577 * Do bounds checking and adjust transfer. If there's an
578 * error, the bounds check will flag that for us.
579 */
580
581 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
582 if (DISKPART(bp->b_dev) != RAW_PART)
583 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
584 db1_printf(("Bounds check failed!!:%d %d\n",
585 (int) bp->b_blkno, (int) wlabel));
586 biodone(bp);
587 return;
588 }
589 s = splbio(); /* XXX Needed? */
590 db1_printf(("Beginning strategy...\n"));
591
592 bp->b_resid = 0;
593 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
594 NULL, NULL, NULL);
595 if (bp->b_error) {
596 bp->b_flags |= B_ERROR;
597 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
598 bp->b_error));
599 }
600 splx(s);
601 #if 0
602 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
603 bp, bp->b_data,
604 (int) bp->b_bcount, (int) bp->b_resid));
605 #endif
606 }
607 /* ARGSUSED */
608 int
609 raidread(dev, uio, flags)
610 dev_t dev;
611 struct uio *uio;
612 int flags;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int part;
617
618 if (unit >= numraid)
619 return (ENXIO);
620 rs = &raid_softc[unit];
621
622 if ((rs->sc_flags & RAIDF_INITED) == 0)
623 return (ENXIO);
624 part = DISKPART(dev);
625
626 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
627
628 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
629
630 }
631 /* ARGSUSED */
632 int
633 raidwrite(dev, uio, flags)
634 dev_t dev;
635 struct uio *uio;
636 int flags;
637 {
638 int unit = raidunit(dev);
639 struct raid_softc *rs;
640
641 if (unit >= numraid)
642 return (ENXIO);
643 rs = &raid_softc[unit];
644
645 if ((rs->sc_flags & RAIDF_INITED) == 0)
646 return (ENXIO);
647 db1_printf(("raidwrite\n"));
648 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
649
650 }
651
652 int
653 raidioctl(dev, cmd, data, flag, p)
654 dev_t dev;
655 u_long cmd;
656 caddr_t data;
657 int flag;
658 struct proc *p;
659 {
660 int unit = raidunit(dev);
661 int error = 0;
662 int part, pmask;
663 struct raid_softc *rs;
664 #if 0
665 int r, c;
666 #endif
667 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
668
669 /* struct ccdbuf *cbp; */
670 /* struct raidbuf *raidbp; */
671 RF_Config_t *k_cfg, *u_cfg;
672 u_char *specific_buf;
673 int retcode = 0;
674 int row;
675 int column;
676 struct rf_recon_req *rrcopy, *rr;
677 RF_ComponentLabel_t *component_label;
678 RF_ComponentLabel_t ci_label;
679 RF_ComponentLabel_t **c_label_ptr;
680 RF_SingleComponent_t *sparePtr,*componentPtr;
681 RF_SingleComponent_t hot_spare;
682 RF_SingleComponent_t component;
683
684 if (unit >= numraid)
685 return (ENXIO);
686 rs = &raid_softc[unit];
687
688 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
689 (int) DISKPART(dev), (int) unit, (int) cmd));
690
691 /* Must be open for writes for these commands... */
692 switch (cmd) {
693 case DIOCSDINFO:
694 case DIOCWDINFO:
695 case DIOCWLABEL:
696 if ((flag & FWRITE) == 0)
697 return (EBADF);
698 }
699
700 /* Must be initialized for these... */
701 switch (cmd) {
702 case DIOCGDINFO:
703 case DIOCSDINFO:
704 case DIOCWDINFO:
705 case DIOCGPART:
706 case DIOCWLABEL:
707 case DIOCGDEFLABEL:
708 case RAIDFRAME_SHUTDOWN:
709 case RAIDFRAME_REWRITEPARITY:
710 case RAIDFRAME_GET_INFO:
711 case RAIDFRAME_RESET_ACCTOTALS:
712 case RAIDFRAME_GET_ACCTOTALS:
713 case RAIDFRAME_KEEP_ACCTOTALS:
714 case RAIDFRAME_GET_SIZE:
715 case RAIDFRAME_FAIL_DISK:
716 case RAIDFRAME_COPYBACK:
717 case RAIDFRAME_CHECKRECON:
718 case RAIDFRAME_GET_COMPONENT_LABEL:
719 case RAIDFRAME_SET_COMPONENT_LABEL:
720 case RAIDFRAME_ADD_HOT_SPARE:
721 case RAIDFRAME_REMOVE_HOT_SPARE:
722 case RAIDFRAME_INIT_LABELS:
723 case RAIDFRAME_REBUILD_IN_PLACE:
724 if ((rs->sc_flags & RAIDF_INITED) == 0)
725 return (ENXIO);
726 }
727
728 switch (cmd) {
729
730
731 /* configure the system */
732 case RAIDFRAME_CONFIGURE:
733
734 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
735 /* copy-in the configuration information */
736 /* data points to a pointer to the configuration structure */
737 u_cfg = *((RF_Config_t **) data);
738 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
739 if (k_cfg == NULL) {
740 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
741 return (ENOMEM);
742 }
743 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
744 sizeof(RF_Config_t));
745 if (retcode) {
746 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
747 retcode));
748 return (retcode);
749 }
750 /* allocate a buffer for the layout-specific data, and copy it
751 * in */
752 if (k_cfg->layoutSpecificSize) {
753 if (k_cfg->layoutSpecificSize > 10000) {
754 /* sanity check */
755 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
756 return (EINVAL);
757 }
758 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
759 (u_char *));
760 if (specific_buf == NULL) {
761 RF_Free(k_cfg, sizeof(RF_Config_t));
762 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
763 return (ENOMEM);
764 }
765 retcode = copyin(k_cfg->layoutSpecific,
766 (caddr_t) specific_buf,
767 k_cfg->layoutSpecificSize);
768 if (retcode) {
769 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
770 retcode));
771 return (retcode);
772 }
773 } else
774 specific_buf = NULL;
775 k_cfg->layoutSpecific = specific_buf;
776
777 /* should do some kind of sanity check on the configuration.
778 * Store the sum of all the bytes in the last byte? */
779
780 #if 0
781 db1_printf(("Considering configuring the system.:%d 0x%x\n",
782 unit, p));
783 #endif
784
785 /* We need the pointer to this a little deeper, so stash it
786 * here... */
787
788 raidPtrs[unit]->proc = p;
789
790 /* configure the system */
791
792 raidPtrs[unit]->raidid = unit;
793 retcode = rf_Configure(raidPtrs[unit], k_cfg);
794
795
796 if (retcode == 0) {
797 retcode = raidinit(dev, raidPtrs[unit], unit);
798 rf_markalldirty( raidPtrs[unit] );
799 }
800 /* free the buffers. No return code here. */
801 if (k_cfg->layoutSpecificSize) {
802 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
803 }
804 RF_Free(k_cfg, sizeof(RF_Config_t));
805
806 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
807 retcode));
808
809 return (retcode);
810
811 /* shutdown the system */
812 case RAIDFRAME_SHUTDOWN:
813
814 if ((error = raidlock(rs)) != 0)
815 return (error);
816
817 /*
818 * If somebody has a partition mounted, we shouldn't
819 * shutdown.
820 */
821
822 part = DISKPART(dev);
823 pmask = (1 << part);
824 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
825 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
826 (rs->sc_dkdev.dk_copenmask & pmask))) {
827 raidunlock(rs);
828 return (EBUSY);
829 }
830
831 if (rf_debugKernelAccess) {
832 printf("call shutdown\n");
833 }
834 raidPtrs[unit]->proc = p; /* XXX necessary evil */
835
836 retcode = rf_Shutdown(raidPtrs[unit]);
837
838 db1_printf(("Done main shutdown\n"));
839
840 pool_destroy(&rs->sc_cbufpool);
841 db1_printf(("Done freeing component buffer freelist\n"));
842
843 /* It's no longer initialized... */
844 rs->sc_flags &= ~RAIDF_INITED;
845
846 /* Detach the disk. */
847 disk_detach(&rs->sc_dkdev);
848
849 raidunlock(rs);
850
851 return (retcode);
852 case RAIDFRAME_GET_COMPONENT_LABEL:
853 c_label_ptr = (RF_ComponentLabel_t **) data;
854 /* need to read the component label for the disk indicated
855 by row,column in component_label
856 XXX need to sanity check these values!!!
857 */
858
859 /* For practice, let's get it directly fromdisk, rather
860 than from the in-core copy */
861 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
862 (RF_ComponentLabel_t *));
863 if (component_label == NULL)
864 return (ENOMEM);
865
866 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
867
868 retcode = copyin( *c_label_ptr, component_label,
869 sizeof(RF_ComponentLabel_t));
870
871 if (retcode) {
872 return(retcode);
873 }
874
875 row = component_label->row;
876 printf("Row: %d\n",row);
877 if (row > raidPtrs[unit]->numRow) {
878 row = 0; /* XXX */
879 }
880 column = component_label->column;
881 printf("Column: %d\n",column);
882 if (column > raidPtrs[unit]->numCol) {
883 column = 0; /* XXX */
884 }
885
886 raidread_component_label(
887 raidPtrs[unit]->Disks[row][column].dev,
888 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
889 component_label );
890
891 retcode = copyout((caddr_t) component_label,
892 (caddr_t) *c_label_ptr,
893 sizeof(RF_ComponentLabel_t));
894 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
895 return (retcode);
896
897 case RAIDFRAME_SET_COMPONENT_LABEL:
898 component_label = (RF_ComponentLabel_t *) data;
899
900 /* XXX check the label for valid stuff... */
901 /* Note that some things *should not* get modified --
902 the user should be re-initing the labels instead of
903 trying to patch things.
904 */
905
906 printf("Got component label:\n");
907 printf("Version: %d\n",component_label->version);
908 printf("Serial Number: %d\n",component_label->serial_number);
909 printf("Mod counter: %d\n",component_label->mod_counter);
910 printf("Row: %d\n", component_label->row);
911 printf("Column: %d\n", component_label->column);
912 printf("Num Rows: %d\n", component_label->num_rows);
913 printf("Num Columns: %d\n", component_label->num_columns);
914 printf("Clean: %d\n", component_label->clean);
915 printf("Status: %d\n", component_label->status);
916
917 row = component_label->row;
918 column = component_label->column;
919
920 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
921 (column < 0) || (column > raidPtrs[unit]->numCol)) {
922 return(EINVAL);
923 }
924
925 /* XXX this isn't allowed to do anything for now :-) */
926 #if 0
927 raidwrite_component_label(
928 raidPtrs[unit]->Disks[row][column].dev,
929 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
930 component_label );
931 #endif
932 return (0);
933
934 case RAIDFRAME_INIT_LABELS:
935 component_label = (RF_ComponentLabel_t *) data;
936 /*
937 we only want the serial number from
938 the above. We get all the rest of the information
939 from the config that was used to create this RAID
940 set.
941 */
942
943 raidPtrs[unit]->serial_number = component_label->serial_number;
944 /* current version number */
945 ci_label.version = RF_COMPONENT_LABEL_VERSION;
946 ci_label.serial_number = component_label->serial_number;
947 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
948 ci_label.num_rows = raidPtrs[unit]->numRow;
949 ci_label.num_columns = raidPtrs[unit]->numCol;
950 ci_label.clean = RF_RAID_DIRTY; /* not clean */
951 ci_label.status = rf_ds_optimal; /* "It's good!" */
952
953 for(row=0;row<raidPtrs[unit]->numRow;row++) {
954 ci_label.row = row;
955 for(column=0;column<raidPtrs[unit]->numCol;column++) {
956 ci_label.column = column;
957 raidwrite_component_label(
958 raidPtrs[unit]->Disks[row][column].dev,
959 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
960 &ci_label );
961 }
962 }
963
964 return (retcode);
965
966 /* initialize all parity */
967 case RAIDFRAME_REWRITEPARITY:
968
969 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
970 return (EINVAL);
971 /* borrow the thread of the requesting process */
972 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
973 retcode = rf_RewriteParity(raidPtrs[unit]);
974 /* return I/O Error if the parity rewrite fails */
975
976 if (retcode) {
977 retcode = EIO;
978 } else {
979 /* set the clean bit! If we shutdown correctly,
980 the clean bit on each component label will get
981 set */
982 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
983 }
984 return (retcode);
985
986
987 case RAIDFRAME_ADD_HOT_SPARE:
988 sparePtr = (RF_SingleComponent_t *) data;
989 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
990 printf("Adding spare\n");
991 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
992 return(retcode);
993
994 case RAIDFRAME_REMOVE_HOT_SPARE:
995 return(retcode);
996
997 case RAIDFRAME_REBUILD_IN_PLACE:
998 componentPtr = (RF_SingleComponent_t *) data;
999 memcpy( &component, componentPtr,
1000 sizeof(RF_SingleComponent_t));
1001 row = component.row;
1002 column = component.column;
1003 printf("Rebuild: %d %d\n",row, column);
1004 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1005 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1006 return(EINVAL);
1007 }
1008 printf("Attempting a rebuild in place\n");
1009 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1010 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1011 return(retcode);
1012
1013 /* issue a test-unit-ready through raidframe to the indicated
1014 * device */
1015 #if 0 /* XXX not supported yet (ever?) */
1016 case RAIDFRAME_TUR:
1017 /* debug only */
1018 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1019 return (retcode);
1020 #endif
1021 case RAIDFRAME_GET_INFO:
1022 {
1023 RF_Raid_t *raid = raidPtrs[unit];
1024 RF_DeviceConfig_t *cfg, **ucfgp;
1025 int i, j, d;
1026
1027 if (!raid->valid)
1028 return (ENODEV);
1029 ucfgp = (RF_DeviceConfig_t **) data;
1030 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1031 (RF_DeviceConfig_t *));
1032 if (cfg == NULL)
1033 return (ENOMEM);
1034 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1035 cfg->rows = raid->numRow;
1036 cfg->cols = raid->numCol;
1037 cfg->ndevs = raid->numRow * raid->numCol;
1038 if (cfg->ndevs >= RF_MAX_DISKS) {
1039 cfg->ndevs = 0;
1040 return (ENOMEM);
1041 }
1042 cfg->nspares = raid->numSpare;
1043 if (cfg->nspares >= RF_MAX_DISKS) {
1044 cfg->nspares = 0;
1045 return (ENOMEM);
1046 }
1047 cfg->maxqdepth = raid->maxQueueDepth;
1048 d = 0;
1049 for (i = 0; i < cfg->rows; i++) {
1050 for (j = 0; j < cfg->cols; j++) {
1051 cfg->devs[d] = raid->Disks[i][j];
1052 d++;
1053 }
1054 }
1055 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1056 cfg->spares[i] = raid->Disks[0][j];
1057 }
1058 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1059 sizeof(RF_DeviceConfig_t));
1060 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1061
1062 return (retcode);
1063 }
1064 break;
1065
1066 case RAIDFRAME_RESET_ACCTOTALS:
1067 {
1068 RF_Raid_t *raid = raidPtrs[unit];
1069
1070 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1071 return (0);
1072 }
1073 break;
1074
1075 case RAIDFRAME_GET_ACCTOTALS:
1076 {
1077 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1078 RF_Raid_t *raid = raidPtrs[unit];
1079
1080 *totals = raid->acc_totals;
1081 return (0);
1082 }
1083 break;
1084
1085 case RAIDFRAME_KEEP_ACCTOTALS:
1086 {
1087 RF_Raid_t *raid = raidPtrs[unit];
1088 int *keep = (int *) data;
1089
1090 raid->keep_acc_totals = *keep;
1091 return (0);
1092 }
1093 break;
1094
1095 case RAIDFRAME_GET_SIZE:
1096 *(int *) data = raidPtrs[unit]->totalSectors;
1097 return (0);
1098
1099 #define RAIDFRAME_RECON 1
1100 /* XXX The above should probably be set somewhere else!! GO */
1101 #if RAIDFRAME_RECON > 0
1102
1103 /* fail a disk & optionally start reconstruction */
1104 case RAIDFRAME_FAIL_DISK:
1105 rr = (struct rf_recon_req *) data;
1106
1107 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1108 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1109 return (EINVAL);
1110
1111 printf("raid%d: Failing the disk: row: %d col: %d\n",
1112 unit, rr->row, rr->col);
1113
1114 /* make a copy of the recon request so that we don't rely on
1115 * the user's buffer */
1116 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1117 bcopy(rr, rrcopy, sizeof(*rr));
1118 rrcopy->raidPtr = (void *) raidPtrs[unit];
1119
1120 LOCK_RECON_Q_MUTEX();
1121 rrcopy->next = recon_queue;
1122 recon_queue = rrcopy;
1123 wakeup(&recon_queue);
1124 UNLOCK_RECON_Q_MUTEX();
1125
1126 return (0);
1127
1128 /* invoke a copyback operation after recon on whatever disk
1129 * needs it, if any */
1130 case RAIDFRAME_COPYBACK:
1131 /* borrow the current thread to get this done */
1132 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1133 rf_CopybackReconstructedData(raidPtrs[unit]);
1134 return (0);
1135
1136 /* return the percentage completion of reconstruction */
1137 case RAIDFRAME_CHECKRECON:
1138 row = *(int *) data;
1139 if (row < 0 || row >= raidPtrs[unit]->numRow)
1140 return (EINVAL);
1141 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1142 *(int *) data = 100;
1143 else
1144 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1145 return (0);
1146
1147 /* the sparetable daemon calls this to wait for the kernel to
1148 * need a spare table. this ioctl does not return until a
1149 * spare table is needed. XXX -- calling mpsleep here in the
1150 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1151 * -- I should either compute the spare table in the kernel,
1152 * or have a different -- XXX XXX -- interface (a different
1153 * character device) for delivering the table -- XXX */
1154 #if 0
1155 case RAIDFRAME_SPARET_WAIT:
1156 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1157 while (!rf_sparet_wait_queue)
1158 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1159 waitreq = rf_sparet_wait_queue;
1160 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1161 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1162
1163 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1164
1165 RF_Free(waitreq, sizeof(*waitreq));
1166 return (0);
1167
1168
1169 /* wakes up a process waiting on SPARET_WAIT and puts an error
1170 * code in it that will cause the dameon to exit */
1171 case RAIDFRAME_ABORT_SPARET_WAIT:
1172 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1173 waitreq->fcol = -1;
1174 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1175 waitreq->next = rf_sparet_wait_queue;
1176 rf_sparet_wait_queue = waitreq;
1177 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1178 wakeup(&rf_sparet_wait_queue);
1179 return (0);
1180
1181 /* used by the spare table daemon to deliver a spare table
1182 * into the kernel */
1183 case RAIDFRAME_SEND_SPARET:
1184
1185 /* install the spare table */
1186 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1187
1188 /* respond to the requestor. the return status of the spare
1189 * table installation is passed in the "fcol" field */
1190 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1191 waitreq->fcol = retcode;
1192 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1193 waitreq->next = rf_sparet_resp_queue;
1194 rf_sparet_resp_queue = waitreq;
1195 wakeup(&rf_sparet_resp_queue);
1196 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1197
1198 return (retcode);
1199 #endif
1200
1201
1202 #endif /* RAIDFRAME_RECON > 0 */
1203
1204 default:
1205 break; /* fall through to the os-specific code below */
1206
1207 }
1208
1209 if (!raidPtrs[unit]->valid)
1210 return (EINVAL);
1211
1212 /*
1213 * Add support for "regular" device ioctls here.
1214 */
1215
1216 switch (cmd) {
1217 case DIOCGDINFO:
1218 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1219 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1220 break;
1221
1222 case DIOCGPART:
1223 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1224 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1225 ((struct partinfo *) data)->part =
1226 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1227 break;
1228
1229 case DIOCWDINFO:
1230 db1_printf(("DIOCWDINFO\n"));
1231 case DIOCSDINFO:
1232 db1_printf(("DIOCSDINFO\n"));
1233 if ((error = raidlock(rs)) != 0)
1234 return (error);
1235
1236 rs->sc_flags |= RAIDF_LABELLING;
1237
1238 error = setdisklabel(rs->sc_dkdev.dk_label,
1239 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1240 if (error == 0) {
1241 if (cmd == DIOCWDINFO)
1242 error = writedisklabel(RAIDLABELDEV(dev),
1243 raidstrategy, rs->sc_dkdev.dk_label,
1244 rs->sc_dkdev.dk_cpulabel);
1245 }
1246 rs->sc_flags &= ~RAIDF_LABELLING;
1247
1248 raidunlock(rs);
1249
1250 if (error)
1251 return (error);
1252 break;
1253
1254 case DIOCWLABEL:
1255 db1_printf(("DIOCWLABEL\n"));
1256 if (*(int *) data != 0)
1257 rs->sc_flags |= RAIDF_WLABEL;
1258 else
1259 rs->sc_flags &= ~RAIDF_WLABEL;
1260 break;
1261
1262 case DIOCGDEFLABEL:
1263 db1_printf(("DIOCGDEFLABEL\n"));
1264 raidgetdefaultlabel(raidPtrs[unit], rs,
1265 (struct disklabel *) data);
1266 break;
1267
1268 default:
1269 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1270 }
1271 return (retcode);
1272
1273 }
1274
1275
1276 /* raidinit -- complete the rest of the initialization for the
1277 RAIDframe device. */
1278
1279
1280 static int
1281 raidinit(dev, raidPtr, unit)
1282 dev_t dev;
1283 RF_Raid_t *raidPtr;
1284 int unit;
1285 {
1286 int retcode;
1287 /* int ix; */
1288 /* struct raidbuf *raidbp; */
1289 struct raid_softc *rs;
1290
1291 retcode = 0;
1292
1293 rs = &raid_softc[unit];
1294 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1295 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1296
1297
1298 /* XXX should check return code first... */
1299 rs->sc_flags |= RAIDF_INITED;
1300
1301 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1302
1303 rs->sc_dkdev.dk_name = rs->sc_xname;
1304
1305 /* disk_attach actually creates space for the CPU disklabel, among
1306 * other things, so it's critical to call this *BEFORE* we try putzing
1307 * with disklabels. */
1308
1309 disk_attach(&rs->sc_dkdev);
1310
1311 /* XXX There may be a weird interaction here between this, and
1312 * protectedSectors, as used in RAIDframe. */
1313
1314 rs->sc_size = raidPtr->totalSectors;
1315 rs->sc_dev = dev;
1316
1317 return (retcode);
1318 }
1319
1320 /*
1321 * This kernel thread never exits. It is created once, and persists
1322 * until the system reboots.
1323 */
1324
1325 void
1326 rf_ReconKernelThread()
1327 {
1328 struct rf_recon_req *req;
1329 int s;
1330
1331 /* XXX not sure what spl() level we should be at here... probably
1332 * splbio() */
1333 s = splbio();
1334
1335 while (1) {
1336 /* grab the next reconstruction request from the queue */
1337 LOCK_RECON_Q_MUTEX();
1338 while (!recon_queue) {
1339 UNLOCK_RECON_Q_MUTEX();
1340 tsleep(&recon_queue, PRIBIO,
1341 "raidframe recon", 0);
1342 LOCK_RECON_Q_MUTEX();
1343 }
1344 req = recon_queue;
1345 recon_queue = recon_queue->next;
1346 UNLOCK_RECON_Q_MUTEX();
1347
1348 /*
1349 * If flags specifies that we should start recon, this call
1350 * will not return until reconstruction completes, fails,
1351 * or is aborted.
1352 */
1353 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1354 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1355
1356 RF_Free(req, sizeof(*req));
1357 }
1358 }
1359 /* wake up the daemon & tell it to get us a spare table
1360 * XXX
1361 * the entries in the queues should be tagged with the raidPtr
1362 * so that in the extremely rare case that two recons happen at once,
1363 * we know for which device were requesting a spare table
1364 * XXX
1365 */
1366 int
1367 rf_GetSpareTableFromDaemon(req)
1368 RF_SparetWait_t *req;
1369 {
1370 int retcode;
1371
1372 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1373 req->next = rf_sparet_wait_queue;
1374 rf_sparet_wait_queue = req;
1375 wakeup(&rf_sparet_wait_queue);
1376
1377 /* mpsleep unlocks the mutex */
1378 while (!rf_sparet_resp_queue) {
1379 tsleep(&rf_sparet_resp_queue, PRIBIO,
1380 "raidframe getsparetable", 0);
1381 #if 0
1382 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1383 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1384 MS_LOCK_SIMPLE);
1385 #endif
1386 }
1387 req = rf_sparet_resp_queue;
1388 rf_sparet_resp_queue = req->next;
1389 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1390
1391 retcode = req->fcol;
1392 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1393 * alloc'd */
1394 return (retcode);
1395 }
1396 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1397 * bp & passes it down.
1398 * any calls originating in the kernel must use non-blocking I/O
1399 * do some extra sanity checking to return "appropriate" error values for
1400 * certain conditions (to make some standard utilities work)
1401 */
1402 int
1403 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1404 RF_Raid_t *raidPtr;
1405 struct buf *bp;
1406 RF_RaidAccessFlags_t flags;
1407 void (*cbFunc) (struct buf *);
1408 void *cbArg;
1409 {
1410 RF_SectorCount_t num_blocks, pb, sum;
1411 RF_RaidAddr_t raid_addr;
1412 int retcode;
1413 struct partition *pp;
1414 daddr_t blocknum;
1415 int unit;
1416 struct raid_softc *rs;
1417 int do_async;
1418
1419 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1420
1421 unit = raidPtr->raidid;
1422 rs = &raid_softc[unit];
1423
1424 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1425 * partition.. Need to make it absolute to the underlying device.. */
1426
1427 blocknum = bp->b_blkno;
1428 if (DISKPART(bp->b_dev) != RAW_PART) {
1429 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1430 blocknum += pp->p_offset;
1431 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1432 pp->p_offset));
1433 } else {
1434 db1_printf(("Is raw..\n"));
1435 }
1436 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1437
1438 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1439 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1440
1441 /* *THIS* is where we adjust what block we're going to... but DO NOT
1442 * TOUCH bp->b_blkno!!! */
1443 raid_addr = blocknum;
1444
1445 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1446 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1447 sum = raid_addr + num_blocks + pb;
1448 if (1 || rf_debugKernelAccess) {
1449 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1450 (int) raid_addr, (int) sum, (int) num_blocks,
1451 (int) pb, (int) bp->b_resid));
1452 }
1453 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1454 || (sum < num_blocks) || (sum < pb)) {
1455 bp->b_error = ENOSPC;
1456 bp->b_flags |= B_ERROR;
1457 bp->b_resid = bp->b_bcount;
1458 biodone(bp);
1459 return (bp->b_error);
1460 }
1461 /*
1462 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1463 */
1464
1465 if (bp->b_bcount & raidPtr->sectorMask) {
1466 bp->b_error = EINVAL;
1467 bp->b_flags |= B_ERROR;
1468 bp->b_resid = bp->b_bcount;
1469 biodone(bp);
1470 return (bp->b_error);
1471 }
1472 db1_printf(("Calling DoAccess..\n"));
1473
1474 /*
1475 * XXX For now, all writes are sync
1476 */
1477 do_async = 1;
1478 if ((bp->b_flags & B_READ) == 0)
1479 do_async = 0;
1480
1481 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1482 * B_READ instead */
1483 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1484 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1485 do_async, raid_addr, num_blocks,
1486 bp->b_un.b_addr,
1487 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1488 NULL, cbFunc, cbArg);
1489 #if 0
1490 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1491 bp->b_data, (int) bp->b_resid));
1492 #endif
1493
1494 /*
1495 * If we requested sync I/O, sleep here.
1496 */
1497 if ((retcode == 0) && (do_async == 0))
1498 tsleep(bp, PRIBIO, "raidsyncio", 0);
1499
1500 return (retcode);
1501 }
1502 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1503
1504 int
1505 rf_DispatchKernelIO(queue, req)
1506 RF_DiskQueue_t *queue;
1507 RF_DiskQueueData_t *req;
1508 {
1509 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1510 struct buf *bp;
1511 struct raidbuf *raidbp = NULL;
1512 struct raid_softc *rs;
1513 int unit;
1514
1515 /* XXX along with the vnode, we also need the softc associated with
1516 * this device.. */
1517
1518 req->queue = queue;
1519
1520 unit = queue->raidPtr->raidid;
1521
1522 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1523
1524 if (unit >= numraid) {
1525 printf("Invalid unit number: %d %d\n", unit, numraid);
1526 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1527 }
1528 rs = &raid_softc[unit];
1529
1530 /* XXX is this the right place? */
1531 disk_busy(&rs->sc_dkdev);
1532
1533 bp = req->bp;
1534 #if 1
1535 /* XXX when there is a physical disk failure, someone is passing us a
1536 * buffer that contains old stuff!! Attempt to deal with this problem
1537 * without taking a performance hit... (not sure where the real bug
1538 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1539
1540 if (bp->b_flags & B_ERROR) {
1541 bp->b_flags &= ~B_ERROR;
1542 }
1543 if (bp->b_error != 0) {
1544 bp->b_error = 0;
1545 }
1546 #endif
1547 raidbp = RAIDGETBUF(rs);
1548
1549 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1550
1551 /*
1552 * context for raidiodone
1553 */
1554 raidbp->rf_obp = bp;
1555 raidbp->req = req;
1556
1557 switch (req->type) {
1558 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1559 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1560 * queue->row, queue->col); */
1561 /* XXX need to do something extra here.. */
1562 /* I'm leaving this in, as I've never actually seen it used,
1563 * and I'd like folks to report it... GO */
1564 printf(("WAKEUP CALLED\n"));
1565 queue->numOutstanding++;
1566
1567 /* XXX need to glue the original buffer into this?? */
1568
1569 KernelWakeupFunc(&raidbp->rf_buf);
1570 break;
1571
1572 case RF_IO_TYPE_READ:
1573 case RF_IO_TYPE_WRITE:
1574
1575 if (req->tracerec) {
1576 RF_ETIMER_START(req->tracerec->timer);
1577 }
1578 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1579 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1580 req->sectorOffset, req->numSector,
1581 req->buf, KernelWakeupFunc, (void *) req,
1582 queue->raidPtr->logBytesPerSector, req->b_proc);
1583
1584 if (rf_debugKernelAccess) {
1585 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1586 (long) bp->b_blkno));
1587 }
1588 queue->numOutstanding++;
1589 queue->last_deq_sector = req->sectorOffset;
1590 /* acc wouldn't have been let in if there were any pending
1591 * reqs at any other priority */
1592 queue->curPriority = req->priority;
1593 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1594 * req->type, queue->row, queue->col); */
1595
1596 db1_printf(("Going for %c to unit %d row %d col %d\n",
1597 req->type, unit, queue->row, queue->col));
1598 db1_printf(("sector %d count %d (%d bytes) %d\n",
1599 (int) req->sectorOffset, (int) req->numSector,
1600 (int) (req->numSector <<
1601 queue->raidPtr->logBytesPerSector),
1602 (int) queue->raidPtr->logBytesPerSector));
1603 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1604 raidbp->rf_buf.b_vp->v_numoutput++;
1605 }
1606 VOP_STRATEGY(&raidbp->rf_buf);
1607
1608 break;
1609
1610 default:
1611 panic("bad req->type in rf_DispatchKernelIO");
1612 }
1613 db1_printf(("Exiting from DispatchKernelIO\n"));
1614 return (0);
1615 }
1616 /* this is the callback function associated with a I/O invoked from
1617 kernel code.
1618 */
1619 static void
1620 KernelWakeupFunc(vbp)
1621 struct buf *vbp;
1622 {
1623 RF_DiskQueueData_t *req = NULL;
1624 RF_DiskQueue_t *queue;
1625 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1626 struct buf *bp;
1627 struct raid_softc *rs;
1628 int unit;
1629 register int s;
1630
1631 s = splbio(); /* XXX */
1632 db1_printf(("recovering the request queue:\n"));
1633 req = raidbp->req;
1634
1635 bp = raidbp->rf_obp;
1636 #if 0
1637 db1_printf(("bp=0x%x\n", bp));
1638 #endif
1639
1640 queue = (RF_DiskQueue_t *) req->queue;
1641
1642 if (raidbp->rf_buf.b_flags & B_ERROR) {
1643 #if 0
1644 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1645 #endif
1646 bp->b_flags |= B_ERROR;
1647 bp->b_error = raidbp->rf_buf.b_error ?
1648 raidbp->rf_buf.b_error : EIO;
1649 }
1650 #if 0
1651 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1652 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1653 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1654 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1655 #endif
1656
1657 /* XXX methinks this could be wrong... */
1658 #if 1
1659 bp->b_resid = raidbp->rf_buf.b_resid;
1660 #endif
1661
1662 if (req->tracerec) {
1663 RF_ETIMER_STOP(req->tracerec->timer);
1664 RF_ETIMER_EVAL(req->tracerec->timer);
1665 RF_LOCK_MUTEX(rf_tracing_mutex);
1666 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1667 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1668 req->tracerec->num_phys_ios++;
1669 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1670 }
1671 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1672
1673 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1674
1675
1676 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1677 * ballistic, and mark the component as hosed... */
1678 #if 1
1679 if (bp->b_flags & B_ERROR) {
1680 /* Mark the disk as dead */
1681 /* but only mark it once... */
1682 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1683 rf_ds_optimal) {
1684 printf("raid%d: IO Error. Marking %s as failed.\n",
1685 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1686 queue->raidPtr->Disks[queue->row][queue->col].status =
1687 rf_ds_failed;
1688 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1689 queue->raidPtr->numFailures++;
1690 /* XXX here we should bump the version number for each component, and write that data out */
1691 } else { /* Disk is already dead... */
1692 /* printf("Disk already marked as dead!\n"); */
1693 }
1694
1695 }
1696 #endif
1697
1698 rs = &raid_softc[unit];
1699 RAIDPUTBUF(rs, raidbp);
1700
1701
1702 if (bp->b_resid == 0) {
1703 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1704 unit, bp->b_resid, bp->b_bcount));
1705 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1706 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1707 } else {
1708 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1709 }
1710
1711 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1712 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1713 /* printf("Exiting KernelWakeupFunc\n"); */
1714
1715 splx(s); /* XXX */
1716 }
1717
1718
1719
1720 /*
1721 * initialize a buf structure for doing an I/O in the kernel.
1722 */
1723 static void
1724 InitBP(
1725 struct buf * bp,
1726 struct vnode * b_vp,
1727 unsigned rw_flag,
1728 dev_t dev,
1729 RF_SectorNum_t startSect,
1730 RF_SectorCount_t numSect,
1731 caddr_t buf,
1732 void (*cbFunc) (struct buf *),
1733 void *cbArg,
1734 int logBytesPerSector,
1735 struct proc * b_proc)
1736 {
1737 /* bp->b_flags = B_PHYS | rw_flag; */
1738 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1739 bp->b_bcount = numSect << logBytesPerSector;
1740 bp->b_bufsize = bp->b_bcount;
1741 bp->b_error = 0;
1742 bp->b_dev = dev;
1743 db1_printf(("bp->b_dev is %d\n", dev));
1744 bp->b_un.b_addr = buf;
1745 #if 0
1746 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1747 #endif
1748
1749 bp->b_blkno = startSect;
1750 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1751 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1752 if (bp->b_bcount == 0) {
1753 panic("bp->b_bcount is zero in InitBP!!\n");
1754 }
1755 bp->b_proc = b_proc;
1756 bp->b_iodone = cbFunc;
1757 bp->b_vp = b_vp;
1758
1759 }
1760 /* Extras... */
1761
1762 unsigned int
1763 rpcc()
1764 {
1765 /* XXX no clue what this is supposed to do.. my guess is that it's
1766 * supposed to read the CPU cycle counter... */
1767 /* db1_printf("this is supposed to do something useful too!??\n"); */
1768 return (0);
1769 }
1770 #if 0
1771 int
1772 rf_GetSpareTableFromDaemon(req)
1773 RF_SparetWait_t *req;
1774 {
1775 int retcode = 1;
1776 printf("This is supposed to do something useful!!\n"); /* XXX */
1777
1778 return (retcode);
1779
1780 }
1781 #endif
1782
1783 static void
1784 raidgetdefaultlabel(raidPtr, rs, lp)
1785 RF_Raid_t *raidPtr;
1786 struct raid_softc *rs;
1787 struct disklabel *lp;
1788 {
1789 db1_printf(("Building a default label...\n"));
1790 bzero(lp, sizeof(*lp));
1791
1792 /* fabricate a label... */
1793 lp->d_secperunit = raidPtr->totalSectors;
1794 lp->d_secsize = raidPtr->bytesPerSector;
1795 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1796 lp->d_ntracks = 1;
1797 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1798 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1799
1800 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1801 lp->d_type = DTYPE_RAID;
1802 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1803 lp->d_rpm = 3600;
1804 lp->d_interleave = 1;
1805 lp->d_flags = 0;
1806
1807 lp->d_partitions[RAW_PART].p_offset = 0;
1808 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1809 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1810 lp->d_npartitions = RAW_PART + 1;
1811
1812 lp->d_magic = DISKMAGIC;
1813 lp->d_magic2 = DISKMAGIC;
1814 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1815
1816 }
1817 /*
1818 * Read the disklabel from the raid device. If one is not present, fake one
1819 * up.
1820 */
1821 static void
1822 raidgetdisklabel(dev)
1823 dev_t dev;
1824 {
1825 int unit = raidunit(dev);
1826 struct raid_softc *rs = &raid_softc[unit];
1827 char *errstring;
1828 struct disklabel *lp = rs->sc_dkdev.dk_label;
1829 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1830 RF_Raid_t *raidPtr;
1831
1832 db1_printf(("Getting the disklabel...\n"));
1833
1834 bzero(clp, sizeof(*clp));
1835
1836 raidPtr = raidPtrs[unit];
1837
1838 raidgetdefaultlabel(raidPtr, rs, lp);
1839
1840 /*
1841 * Call the generic disklabel extraction routine.
1842 */
1843 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1844 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1845 if (errstring)
1846 raidmakedisklabel(rs);
1847 else {
1848 int i;
1849 struct partition *pp;
1850
1851 /*
1852 * Sanity check whether the found disklabel is valid.
1853 *
1854 * This is necessary since total size of the raid device
1855 * may vary when an interleave is changed even though exactly
1856 * same componets are used, and old disklabel may used
1857 * if that is found.
1858 */
1859 if (lp->d_secperunit != rs->sc_size)
1860 printf("WARNING: %s: "
1861 "total sector size in disklabel (%d) != "
1862 "the size of raid (%d)\n", rs->sc_xname,
1863 lp->d_secperunit, rs->sc_size);
1864 for (i = 0; i < lp->d_npartitions; i++) {
1865 pp = &lp->d_partitions[i];
1866 if (pp->p_offset + pp->p_size > rs->sc_size)
1867 printf("WARNING: %s: end of partition `%c' "
1868 "exceeds the size of raid (%d)\n",
1869 rs->sc_xname, 'a' + i, rs->sc_size);
1870 }
1871 }
1872
1873 }
1874 /*
1875 * Take care of things one might want to take care of in the event
1876 * that a disklabel isn't present.
1877 */
1878 static void
1879 raidmakedisklabel(rs)
1880 struct raid_softc *rs;
1881 {
1882 struct disklabel *lp = rs->sc_dkdev.dk_label;
1883 db1_printf(("Making a label..\n"));
1884
1885 /*
1886 * For historical reasons, if there's no disklabel present
1887 * the raw partition must be marked FS_BSDFFS.
1888 */
1889
1890 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1891
1892 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1893
1894 lp->d_checksum = dkcksum(lp);
1895 }
1896 /*
1897 * Lookup the provided name in the filesystem. If the file exists,
1898 * is a valid block device, and isn't being used by anyone else,
1899 * set *vpp to the file's vnode.
1900 * You'll find the original of this in ccd.c
1901 */
1902 int
1903 raidlookup(path, p, vpp)
1904 char *path;
1905 struct proc *p;
1906 struct vnode **vpp; /* result */
1907 {
1908 struct nameidata nd;
1909 struct vnode *vp;
1910 struct vattr va;
1911 int error;
1912
1913 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1914 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1915 #ifdef DEBUG
1916 printf("RAIDframe: vn_open returned %d\n", error);
1917 #endif
1918 return (error);
1919 }
1920 vp = nd.ni_vp;
1921 if (vp->v_usecount > 1) {
1922 VOP_UNLOCK(vp, 0);
1923 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1924 return (EBUSY);
1925 }
1926 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1927 VOP_UNLOCK(vp, 0);
1928 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1929 return (error);
1930 }
1931 /* XXX: eventually we should handle VREG, too. */
1932 if (va.va_type != VBLK) {
1933 VOP_UNLOCK(vp, 0);
1934 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1935 return (ENOTBLK);
1936 }
1937 VOP_UNLOCK(vp, 0);
1938 *vpp = vp;
1939 return (0);
1940 }
1941 /*
1942 * Wait interruptibly for an exclusive lock.
1943 *
1944 * XXX
1945 * Several drivers do this; it should be abstracted and made MP-safe.
1946 * (Hmm... where have we seen this warning before :-> GO )
1947 */
1948 static int
1949 raidlock(rs)
1950 struct raid_softc *rs;
1951 {
1952 int error;
1953
1954 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1955 rs->sc_flags |= RAIDF_WANTED;
1956 if ((error =
1957 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1958 return (error);
1959 }
1960 rs->sc_flags |= RAIDF_LOCKED;
1961 return (0);
1962 }
1963 /*
1964 * Unlock and wake up any waiters.
1965 */
1966 static void
1967 raidunlock(rs)
1968 struct raid_softc *rs;
1969 {
1970
1971 rs->sc_flags &= ~RAIDF_LOCKED;
1972 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1973 rs->sc_flags &= ~RAIDF_WANTED;
1974 wakeup(rs);
1975 }
1976 }
1977
1978
1979 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1980 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1981
1982 int
1983 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1984 {
1985 RF_ComponentLabel_t component_label;
1986 raidread_component_label(dev, b_vp, &component_label);
1987 component_label.mod_counter = mod_counter;
1988 component_label.clean = RF_RAID_CLEAN;
1989 raidwrite_component_label(dev, b_vp, &component_label);
1990 return(0);
1991 }
1992
1993
1994 int
1995 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1996 {
1997 RF_ComponentLabel_t component_label;
1998 raidread_component_label(dev, b_vp, &component_label);
1999 component_label.mod_counter = mod_counter;
2000 component_label.clean = RF_RAID_DIRTY;
2001 raidwrite_component_label(dev, b_vp, &component_label);
2002 return(0);
2003 }
2004
2005 /* ARGSUSED */
2006 int
2007 raidread_component_label(dev, b_vp, component_label)
2008 dev_t dev;
2009 struct vnode *b_vp;
2010 RF_ComponentLabel_t *component_label;
2011 {
2012 struct buf *bp;
2013 int error;
2014
2015 /* XXX should probably ensure that we don't try to do this if
2016 someone has changed rf_protected_sectors. */
2017
2018 /* get a block of the appropriate size... */
2019 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2020 bp->b_dev = dev;
2021
2022 /* get our ducks in a row for the read */
2023 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2024 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2025 bp->b_flags = B_BUSY | B_READ;
2026 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2027
2028 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2029
2030 error = biowait(bp);
2031
2032 if (!error) {
2033 memcpy(component_label, bp->b_un.b_addr,
2034 sizeof(RF_ComponentLabel_t));
2035 #if 0
2036 printf("raidread_component_label: got component label:\n");
2037 printf("Version: %d\n",component_label->version);
2038 printf("Serial Number: %d\n",component_label->serial_number);
2039 printf("Mod counter: %d\n",component_label->mod_counter);
2040 printf("Row: %d\n", component_label->row);
2041 printf("Column: %d\n", component_label->column);
2042 printf("Num Rows: %d\n", component_label->num_rows);
2043 printf("Num Columns: %d\n", component_label->num_columns);
2044 printf("Clean: %d\n", component_label->clean);
2045 printf("Status: %d\n", component_label->status);
2046 #endif
2047 } else {
2048 printf("Failed to read RAID component label!\n");
2049 }
2050
2051 bp->b_flags = B_INVAL | B_AGE;
2052 brelse(bp);
2053 return(error);
2054 }
2055 /* ARGSUSED */
2056 int
2057 raidwrite_component_label(dev, b_vp, component_label)
2058 dev_t dev;
2059 struct vnode *b_vp;
2060 RF_ComponentLabel_t *component_label;
2061 {
2062 struct buf *bp;
2063 int error;
2064
2065 /* get a block of the appropriate size... */
2066 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2067 bp->b_dev = dev;
2068
2069 /* get our ducks in a row for the write */
2070 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2071 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2072 bp->b_flags = B_BUSY | B_WRITE;
2073 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2074
2075 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2076
2077 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2078
2079 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2080 error = biowait(bp);
2081 bp->b_flags = B_INVAL | B_AGE;
2082 brelse(bp);
2083 if (error) {
2084 printf("Failed to write RAID component info!\n");
2085 }
2086
2087 return(error);
2088 }
2089
2090 void
2091 rf_markalldirty( raidPtr )
2092 RF_Raid_t *raidPtr;
2093 {
2094 RF_ComponentLabel_t c_label;
2095 int r,c;
2096
2097 raidPtr->mod_counter++;
2098 for (r = 0; r < raidPtr->numRow; r++) {
2099 for (c = 0; c < raidPtr->numCol; c++) {
2100 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2101 raidread_component_label(
2102 raidPtr->Disks[r][c].dev,
2103 raidPtr->raid_cinfo[r][c].ci_vp,
2104 &c_label);
2105 if (c_label.status == rf_ds_spared) {
2106 /* XXX do something special...
2107 but whatever you do, don't
2108 try to access it!! */
2109 } else {
2110 #if 0
2111 c_label.status =
2112 raidPtr->Disks[r][c].status;
2113 raidwrite_component_label(
2114 raidPtr->Disks[r][c].dev,
2115 raidPtr->raid_cinfo[r][c].ci_vp,
2116 &c_label);
2117 #endif
2118 raidmarkdirty(
2119 raidPtr->Disks[r][c].dev,
2120 raidPtr->raid_cinfo[r][c].ci_vp,
2121 raidPtr->mod_counter);
2122 }
2123 }
2124 }
2125 }
2126 /* printf("Component labels marked dirty.\n"); */
2127 #if 0
2128 for( c = 0; c < raidPtr->numSpare ; c++) {
2129 sparecol = raidPtr->numCol + c;
2130 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2131 /*
2132
2133 XXX this is where we get fancy and map this spare
2134 into it's correct spot in the array.
2135
2136 */
2137 /*
2138
2139 we claim this disk is "optimal" if it's
2140 rf_ds_used_spare, as that means it should be
2141 directly substitutable for the disk it replaced.
2142 We note that too...
2143
2144 */
2145
2146 for(i=0;i<raidPtr->numRow;i++) {
2147 for(j=0;j<raidPtr->numCol;j++) {
2148 if ((raidPtr->Disks[i][j].spareRow ==
2149 r) &&
2150 (raidPtr->Disks[i][j].spareCol ==
2151 sparecol)) {
2152 srow = r;
2153 scol = sparecol;
2154 break;
2155 }
2156 }
2157 }
2158
2159 raidread_component_label(
2160 raidPtr->Disks[r][sparecol].dev,
2161 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2162 &c_label);
2163 /* make sure status is noted */
2164 c_label.version = RF_COMPONENT_LABEL_VERSION;
2165 c_label.mod_counter = raidPtr->mod_counter;
2166 c_label.serial_number = raidPtr->serial_number;
2167 c_label.row = srow;
2168 c_label.column = scol;
2169 c_label.num_rows = raidPtr->numRow;
2170 c_label.num_columns = raidPtr->numCol;
2171 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2172 c_label.status = rf_ds_optimal;
2173 raidwrite_component_label(
2174 raidPtr->Disks[r][sparecol].dev,
2175 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2176 &c_label);
2177 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2178 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2179 }
2180 }
2181
2182 #endif
2183 }
2184
2185
2186 void
2187 rf_update_component_labels( raidPtr )
2188 RF_Raid_t *raidPtr;
2189 {
2190 RF_ComponentLabel_t c_label;
2191 int sparecol;
2192 int r,c;
2193 int i,j;
2194 int srow, scol;
2195
2196 srow = -1;
2197 scol = -1;
2198
2199 /* XXX should do extra checks to make sure things really are clean,
2200 rather than blindly setting the clean bit... */
2201
2202 raidPtr->mod_counter++;
2203
2204 for (r = 0; r < raidPtr->numRow; r++) {
2205 for (c = 0; c < raidPtr->numCol; c++) {
2206 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2207 raidread_component_label(
2208 raidPtr->Disks[r][c].dev,
2209 raidPtr->raid_cinfo[r][c].ci_vp,
2210 &c_label);
2211 /* make sure status is noted */
2212 c_label.status = rf_ds_optimal;
2213 raidwrite_component_label(
2214 raidPtr->Disks[r][c].dev,
2215 raidPtr->raid_cinfo[r][c].ci_vp,
2216 &c_label);
2217 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2218 raidmarkclean(
2219 raidPtr->Disks[r][c].dev,
2220 raidPtr->raid_cinfo[r][c].ci_vp,
2221 raidPtr->mod_counter);
2222 }
2223 }
2224 /* else we don't touch it.. */
2225 #if 0
2226 else if (raidPtr->Disks[r][c].status !=
2227 rf_ds_failed) {
2228 raidread_component_label(
2229 raidPtr->Disks[r][c].dev,
2230 raidPtr->raid_cinfo[r][c].ci_vp,
2231 &c_label);
2232 /* make sure status is noted */
2233 c_label.status =
2234 raidPtr->Disks[r][c].status;
2235 raidwrite_component_label(
2236 raidPtr->Disks[r][c].dev,
2237 raidPtr->raid_cinfo[r][c].ci_vp,
2238 &c_label);
2239 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2240 raidmarkclean(
2241 raidPtr->Disks[r][c].dev,
2242 raidPtr->raid_cinfo[r][c].ci_vp,
2243 raidPtr->mod_counter);
2244 }
2245 }
2246 #endif
2247 }
2248 }
2249
2250 for( c = 0; c < raidPtr->numSpare ; c++) {
2251 sparecol = raidPtr->numCol + c;
2252 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2253 /*
2254
2255 we claim this disk is "optimal" if it's
2256 rf_ds_used_spare, as that means it should be
2257 directly substitutable for the disk it replaced.
2258 We note that too...
2259
2260 */
2261
2262 for(i=0;i<raidPtr->numRow;i++) {
2263 for(j=0;j<raidPtr->numCol;j++) {
2264 if ((raidPtr->Disks[i][j].spareRow ==
2265 0) &&
2266 (raidPtr->Disks[i][j].spareCol ==
2267 sparecol)) {
2268 srow = i;
2269 scol = j;
2270 break;
2271 }
2272 }
2273 }
2274
2275 raidread_component_label(
2276 raidPtr->Disks[0][sparecol].dev,
2277 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2278 &c_label);
2279 /* make sure status is noted */
2280 c_label.version = RF_COMPONENT_LABEL_VERSION;
2281 c_label.mod_counter = raidPtr->mod_counter;
2282 c_label.serial_number = raidPtr->serial_number;
2283 c_label.row = srow;
2284 c_label.column = scol;
2285 c_label.num_rows = raidPtr->numRow;
2286 c_label.num_columns = raidPtr->numCol;
2287 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2288 c_label.status = rf_ds_optimal;
2289 raidwrite_component_label(
2290 raidPtr->Disks[0][sparecol].dev,
2291 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2292 &c_label);
2293 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2294 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2295 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2296 raidPtr->mod_counter);
2297 }
2298 }
2299 }
2300 /* printf("Component labels updated\n"); */
2301 }
2302