rf_netbsdkintf.c revision 1.19 1 /* $NetBSD: rf_netbsdkintf.c,v 1.19 1999/06/13 20:36:17 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 #define RAIDLABELDEV(dev) \
272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276
277 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
278 struct disklabel *));
279 static void raidgetdisklabel __P((dev_t));
280 static void raidmakedisklabel __P((struct raid_softc *));
281
282 static int raidlock __P((struct raid_softc *));
283 static void raidunlock __P((struct raid_softc *));
284 int raidlookup __P((char *, struct proc * p, struct vnode **));
285
286 static void rf_markalldirty __P((RF_Raid_t *));
287
288 void
289 raidattach(num)
290 int num;
291 {
292 int raidID;
293 int i, rc;
294
295 #ifdef DEBUG
296 printf("raidattach: Asked for %d units\n", num);
297 #endif
298
299 if (num <= 0) {
300 #ifdef DIAGNOSTIC
301 panic("raidattach: count <= 0");
302 #endif
303 return;
304 }
305 /* This is where all the initialization stuff gets done. */
306
307 /* Make some space for requested number of units... */
308
309 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
310 if (raidPtrs == NULL) {
311 panic("raidPtrs is NULL!!\n");
312 }
313
314 rc = rf_mutex_init(&rf_sparet_wait_mutex);
315 if (rc) {
316 RF_PANIC();
317 }
318
319 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
320 recon_queue = NULL;
321
322 for (i = 0; i < numraid; i++)
323 raidPtrs[i] = NULL;
324 rc = rf_BootRaidframe();
325 if (rc == 0)
326 printf("Kernelized RAIDframe activated\n");
327 else
328 panic("Serious error booting RAID!!\n");
329
330 rf_kbooted = RFK_BOOT_GOOD;
331
332 /* put together some datastructures like the CCD device does.. This
333 * lets us lock the device and what-not when it gets opened. */
334
335 raid_softc = (struct raid_softc *)
336 malloc(num * sizeof(struct raid_softc),
337 M_RAIDFRAME, M_NOWAIT);
338 if (raid_softc == NULL) {
339 printf("WARNING: no memory for RAIDframe driver\n");
340 return;
341 }
342 numraid = num;
343 bzero(raid_softc, num * sizeof(struct raid_softc));
344
345 for (raidID = 0; raidID < num; raidID++) {
346 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
347 (RF_Raid_t *));
348 if (raidPtrs[raidID] == NULL) {
349 printf("raidPtrs[%d] is NULL\n", raidID);
350 }
351 }
352 }
353
354
355 int
356 raidsize(dev)
357 dev_t dev;
358 {
359 struct raid_softc *rs;
360 struct disklabel *lp;
361 int part, unit, omask, size;
362
363 unit = raidunit(dev);
364 if (unit >= numraid)
365 return (-1);
366 rs = &raid_softc[unit];
367
368 if ((rs->sc_flags & RAIDF_INITED) == 0)
369 return (-1);
370
371 part = DISKPART(dev);
372 omask = rs->sc_dkdev.dk_openmask & (1 << part);
373 lp = rs->sc_dkdev.dk_label;
374
375 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
376 return (-1);
377
378 if (lp->d_partitions[part].p_fstype != FS_SWAP)
379 size = -1;
380 else
381 size = lp->d_partitions[part].p_size *
382 (lp->d_secsize / DEV_BSIZE);
383
384 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
385 return (-1);
386
387 return (size);
388
389 }
390
391 int
392 raiddump(dev, blkno, va, size)
393 dev_t dev;
394 daddr_t blkno;
395 caddr_t va;
396 size_t size;
397 {
398 /* Not implemented. */
399 return ENXIO;
400 }
401 /* ARGSUSED */
402 int
403 raidopen(dev, flags, fmt, p)
404 dev_t dev;
405 int flags, fmt;
406 struct proc *p;
407 {
408 int unit = raidunit(dev);
409 struct raid_softc *rs;
410 struct disklabel *lp;
411 int part, pmask;
412 int error = 0;
413
414 if (unit >= numraid)
415 return (ENXIO);
416 rs = &raid_softc[unit];
417
418 if ((error = raidlock(rs)) != 0)
419 return (error);
420 lp = rs->sc_dkdev.dk_label;
421
422 part = DISKPART(dev);
423 pmask = (1 << part);
424
425 db1_printf(("Opening raid device number: %d partition: %d\n",
426 unit, part));
427
428
429 if ((rs->sc_flags & RAIDF_INITED) &&
430 (rs->sc_dkdev.dk_openmask == 0))
431 raidgetdisklabel(dev);
432
433 /* make sure that this partition exists */
434
435 if (part != RAW_PART) {
436 db1_printf(("Not a raw partition..\n"));
437 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
438 ((part >= lp->d_npartitions) ||
439 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
440 error = ENXIO;
441 raidunlock(rs);
442 db1_printf(("Bailing out...\n"));
443 return (error);
444 }
445 }
446 /* Prevent this unit from being unconfigured while open. */
447 switch (fmt) {
448 case S_IFCHR:
449 rs->sc_dkdev.dk_copenmask |= pmask;
450 break;
451
452 case S_IFBLK:
453 rs->sc_dkdev.dk_bopenmask |= pmask;
454 break;
455 }
456
457 if ((rs->sc_dkdev.dk_openmask == 0) &&
458 ((rs->sc_flags & RAIDF_INITED) != 0)) {
459 /* First one... mark things as dirty... Note that we *MUST*
460 have done a configure before this. I DO NOT WANT TO BE
461 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
462 THAT THEY BELONG TOGETHER!!!!! */
463 /* XXX should check to see if we're only open for reading
464 here... If so, we needn't do this, but then need some
465 other way of keeping track of what's happened.. */
466
467 rf_markalldirty( raidPtrs[unit] );
468 }
469
470
471 rs->sc_dkdev.dk_openmask =
472 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
473
474 raidunlock(rs);
475
476 return (error);
477
478
479 }
480 /* ARGSUSED */
481 int
482 raidclose(dev, flags, fmt, p)
483 dev_t dev;
484 int flags, fmt;
485 struct proc *p;
486 {
487 int unit = raidunit(dev);
488 struct raid_softc *rs;
489 int error = 0;
490 int part;
491
492 if (unit >= numraid)
493 return (ENXIO);
494 rs = &raid_softc[unit];
495
496 if ((error = raidlock(rs)) != 0)
497 return (error);
498
499 part = DISKPART(dev);
500
501 /* ...that much closer to allowing unconfiguration... */
502 switch (fmt) {
503 case S_IFCHR:
504 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
505 break;
506
507 case S_IFBLK:
508 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
509 break;
510 }
511 rs->sc_dkdev.dk_openmask =
512 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
513
514 if ((rs->sc_dkdev.dk_openmask == 0) &&
515 ((rs->sc_flags & RAIDF_INITED) != 0)) {
516 /* Last one... device is not unconfigured yet.
517 Device shutdown has taken care of setting the
518 clean bits if RAIDF_INITED is not set
519 mark things as clean... */
520 rf_update_component_labels( raidPtrs[unit] );
521 }
522
523 raidunlock(rs);
524 return (0);
525
526 }
527
528 void
529 raidstrategy(bp)
530 register struct buf *bp;
531 {
532 register int s;
533
534 unsigned int raidID = raidunit(bp->b_dev);
535 RF_Raid_t *raidPtr;
536 struct raid_softc *rs = &raid_softc[raidID];
537 struct disklabel *lp;
538 int wlabel;
539
540 #if 0
541 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
542 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
543 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
544 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
545 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
546
547 if (bp->b_flags & B_READ)
548 db1_printf(("READ\n"));
549 else
550 db1_printf(("WRITE\n"));
551 #endif
552 if (rf_kbooted != RFK_BOOT_GOOD)
553 return;
554 if (raidID >= numraid || !raidPtrs[raidID]) {
555 bp->b_error = ENODEV;
556 bp->b_flags |= B_ERROR;
557 bp->b_resid = bp->b_bcount;
558 biodone(bp);
559 return;
560 }
561 raidPtr = raidPtrs[raidID];
562 if (!raidPtr->valid) {
563 bp->b_error = ENODEV;
564 bp->b_flags |= B_ERROR;
565 bp->b_resid = bp->b_bcount;
566 biodone(bp);
567 return;
568 }
569 if (bp->b_bcount == 0) {
570 db1_printf(("b_bcount is zero..\n"));
571 biodone(bp);
572 return;
573 }
574 lp = rs->sc_dkdev.dk_label;
575
576 /*
577 * Do bounds checking and adjust transfer. If there's an
578 * error, the bounds check will flag that for us.
579 */
580
581 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
582 if (DISKPART(bp->b_dev) != RAW_PART)
583 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
584 db1_printf(("Bounds check failed!!:%d %d\n",
585 (int) bp->b_blkno, (int) wlabel));
586 biodone(bp);
587 return;
588 }
589 s = splbio(); /* XXX Needed? */
590 db1_printf(("Beginning strategy...\n"));
591
592 bp->b_resid = 0;
593 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
594 NULL, NULL, NULL);
595 if (bp->b_error) {
596 bp->b_flags |= B_ERROR;
597 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
598 bp->b_error));
599 }
600 splx(s);
601 #if 0
602 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
603 bp, bp->b_data,
604 (int) bp->b_bcount, (int) bp->b_resid));
605 #endif
606 }
607 /* ARGSUSED */
608 int
609 raidread(dev, uio, flags)
610 dev_t dev;
611 struct uio *uio;
612 int flags;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int part;
617
618 if (unit >= numraid)
619 return (ENXIO);
620 rs = &raid_softc[unit];
621
622 if ((rs->sc_flags & RAIDF_INITED) == 0)
623 return (ENXIO);
624 part = DISKPART(dev);
625
626 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
627
628 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
629
630 }
631 /* ARGSUSED */
632 int
633 raidwrite(dev, uio, flags)
634 dev_t dev;
635 struct uio *uio;
636 int flags;
637 {
638 int unit = raidunit(dev);
639 struct raid_softc *rs;
640
641 if (unit >= numraid)
642 return (ENXIO);
643 rs = &raid_softc[unit];
644
645 if ((rs->sc_flags & RAIDF_INITED) == 0)
646 return (ENXIO);
647 db1_printf(("raidwrite\n"));
648 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
649
650 }
651
652 int
653 raidioctl(dev, cmd, data, flag, p)
654 dev_t dev;
655 u_long cmd;
656 caddr_t data;
657 int flag;
658 struct proc *p;
659 {
660 int unit = raidunit(dev);
661 int error = 0;
662 int part, pmask;
663 struct raid_softc *rs;
664 #if 0
665 int r, c;
666 #endif
667 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
668
669 /* struct ccdbuf *cbp; */
670 /* struct raidbuf *raidbp; */
671 RF_Config_t *k_cfg, *u_cfg;
672 u_char *specific_buf;
673 int retcode = 0;
674 int row;
675 int column;
676 struct rf_recon_req *rrcopy, *rr;
677 RF_ComponentLabel_t *component_label;
678 RF_ComponentLabel_t ci_label;
679 RF_ComponentLabel_t **c_label_ptr;
680 RF_SingleComponent_t *sparePtr,*componentPtr;
681 RF_SingleComponent_t hot_spare;
682 RF_SingleComponent_t component;
683
684 if (unit >= numraid)
685 return (ENXIO);
686 rs = &raid_softc[unit];
687
688 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
689 (int) DISKPART(dev), (int) unit, (int) cmd));
690
691 /* Must be open for writes for these commands... */
692 switch (cmd) {
693 case DIOCSDINFO:
694 case DIOCWDINFO:
695 case DIOCWLABEL:
696 if ((flag & FWRITE) == 0)
697 return (EBADF);
698 }
699
700 /* Must be initialized for these... */
701 switch (cmd) {
702 case DIOCGDINFO:
703 case DIOCSDINFO:
704 case DIOCWDINFO:
705 case DIOCGPART:
706 case DIOCWLABEL:
707 case DIOCGDEFLABEL:
708 case RAIDFRAME_SHUTDOWN:
709 case RAIDFRAME_REWRITEPARITY:
710 case RAIDFRAME_GET_INFO:
711 case RAIDFRAME_RESET_ACCTOTALS:
712 case RAIDFRAME_GET_ACCTOTALS:
713 case RAIDFRAME_KEEP_ACCTOTALS:
714 case RAIDFRAME_GET_SIZE:
715 case RAIDFRAME_FAIL_DISK:
716 case RAIDFRAME_COPYBACK:
717 case RAIDFRAME_CHECKRECON:
718 case RAIDFRAME_GET_COMPONENT_LABEL:
719 case RAIDFRAME_SET_COMPONENT_LABEL:
720 case RAIDFRAME_ADD_HOT_SPARE:
721 case RAIDFRAME_REMOVE_HOT_SPARE:
722 case RAIDFRAME_INIT_LABELS:
723 case RAIDFRAME_REBUILD_IN_PLACE:
724 if ((rs->sc_flags & RAIDF_INITED) == 0)
725 return (ENXIO);
726 }
727
728 switch (cmd) {
729
730
731 /* configure the system */
732 case RAIDFRAME_CONFIGURE:
733
734 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
735 /* copy-in the configuration information */
736 /* data points to a pointer to the configuration structure */
737 u_cfg = *((RF_Config_t **) data);
738 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
739 if (k_cfg == NULL) {
740 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
741 return (ENOMEM);
742 }
743 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
744 sizeof(RF_Config_t));
745 if (retcode) {
746 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
747 retcode));
748 return (retcode);
749 }
750 /* allocate a buffer for the layout-specific data, and copy it
751 * in */
752 if (k_cfg->layoutSpecificSize) {
753 if (k_cfg->layoutSpecificSize > 10000) {
754 /* sanity check */
755 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
756 return (EINVAL);
757 }
758 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
759 (u_char *));
760 if (specific_buf == NULL) {
761 RF_Free(k_cfg, sizeof(RF_Config_t));
762 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
763 return (ENOMEM);
764 }
765 retcode = copyin(k_cfg->layoutSpecific,
766 (caddr_t) specific_buf,
767 k_cfg->layoutSpecificSize);
768 if (retcode) {
769 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
770 retcode));
771 return (retcode);
772 }
773 } else
774 specific_buf = NULL;
775 k_cfg->layoutSpecific = specific_buf;
776
777 /* should do some kind of sanity check on the configuration.
778 * Store the sum of all the bytes in the last byte? */
779
780 #if 0
781 db1_printf(("Considering configuring the system.:%d 0x%x\n",
782 unit, p));
783 #endif
784
785 /* We need the pointer to this a little deeper, so stash it
786 * here... */
787
788 raidPtrs[unit]->proc = p;
789
790 /* configure the system */
791
792 raidPtrs[unit]->raidid = unit;
793 retcode = rf_Configure(raidPtrs[unit], k_cfg);
794
795
796 if (retcode == 0) {
797 retcode = raidinit(dev, raidPtrs[unit], unit);
798 rf_markalldirty( raidPtrs[unit] );
799 }
800 /* free the buffers. No return code here. */
801 if (k_cfg->layoutSpecificSize) {
802 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
803 }
804 RF_Free(k_cfg, sizeof(RF_Config_t));
805
806 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
807 retcode));
808
809 return (retcode);
810
811 /* shutdown the system */
812 case RAIDFRAME_SHUTDOWN:
813
814 if ((error = raidlock(rs)) != 0)
815 return (error);
816
817 /*
818 * If somebody has a partition mounted, we shouldn't
819 * shutdown.
820 */
821
822 part = DISKPART(dev);
823 pmask = (1 << part);
824 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
825 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
826 (rs->sc_dkdev.dk_copenmask & pmask))) {
827 raidunlock(rs);
828 return (EBUSY);
829 }
830
831 if (rf_debugKernelAccess) {
832 printf("call shutdown\n");
833 }
834 raidPtrs[unit]->proc = p; /* XXX necessary evil */
835
836 retcode = rf_Shutdown(raidPtrs[unit]);
837
838 db1_printf(("Done main shutdown\n"));
839
840 pool_destroy(&rs->sc_cbufpool);
841 db1_printf(("Done freeing component buffer freelist\n"));
842
843 /* It's no longer initialized... */
844 rs->sc_flags &= ~RAIDF_INITED;
845
846 /* Detach the disk. */
847 disk_detach(&rs->sc_dkdev);
848
849 raidunlock(rs);
850
851 return (retcode);
852 case RAIDFRAME_GET_COMPONENT_LABEL:
853 c_label_ptr = (RF_ComponentLabel_t **) data;
854 /* need to read the component label for the disk indicated
855 by row,column in component_label
856 XXX need to sanity check these values!!!
857 */
858
859 /* For practice, let's get it directly fromdisk, rather
860 than from the in-core copy */
861 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
862 (RF_ComponentLabel_t *));
863 if (component_label == NULL)
864 return (ENOMEM);
865
866 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
867
868 retcode = copyin( *c_label_ptr, component_label,
869 sizeof(RF_ComponentLabel_t));
870
871 if (retcode) {
872 return(retcode);
873 }
874
875 row = component_label->row;
876 printf("Row: %d\n",row);
877 if (row > raidPtrs[unit]->numRow) {
878 row = 0; /* XXX */
879 }
880 column = component_label->column;
881 printf("Column: %d\n",column);
882 if (column > raidPtrs[unit]->numCol) {
883 column = 0; /* XXX */
884 }
885
886 raidread_component_label(
887 raidPtrs[unit]->Disks[row][column].dev,
888 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
889 component_label );
890
891 retcode = copyout((caddr_t) component_label,
892 (caddr_t) *c_label_ptr,
893 sizeof(RF_ComponentLabel_t));
894 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
895 return (retcode);
896
897 case RAIDFRAME_SET_COMPONENT_LABEL:
898 component_label = (RF_ComponentLabel_t *) data;
899
900 /* XXX check the label for valid stuff... */
901 /* Note that some things *should not* get modified --
902 the user should be re-initing the labels instead of
903 trying to patch things.
904 */
905
906 printf("Got component label:\n");
907 printf("Version: %d\n",component_label->version);
908 printf("Serial Number: %d\n",component_label->serial_number);
909 printf("Mod counter: %d\n",component_label->mod_counter);
910 printf("Row: %d\n", component_label->row);
911 printf("Column: %d\n", component_label->column);
912 printf("Num Rows: %d\n", component_label->num_rows);
913 printf("Num Columns: %d\n", component_label->num_columns);
914 printf("Clean: %d\n", component_label->clean);
915 printf("Status: %d\n", component_label->status);
916
917 row = component_label->row;
918 column = component_label->column;
919
920 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
921 (column < 0) || (column > raidPtrs[unit]->numCol)) {
922 return(EINVAL);
923 }
924
925 /* XXX this isn't allowed to do anything for now :-) */
926 #if 0
927 raidwrite_component_label(
928 raidPtrs[unit]->Disks[row][column].dev,
929 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
930 component_label );
931 #endif
932 return (0);
933
934 case RAIDFRAME_INIT_LABELS:
935 component_label = (RF_ComponentLabel_t *) data;
936 /*
937 we only want the serial number from
938 the above. We get all the rest of the information
939 from the config that was used to create this RAID
940 set.
941 */
942
943 raidPtrs[unit]->serial_number = component_label->serial_number;
944 /* current version number */
945 ci_label.version = RF_COMPONENT_LABEL_VERSION;
946 ci_label.serial_number = component_label->serial_number;
947 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
948 ci_label.num_rows = raidPtrs[unit]->numRow;
949 ci_label.num_columns = raidPtrs[unit]->numCol;
950 ci_label.clean = RF_RAID_DIRTY; /* not clean */
951 ci_label.status = rf_ds_optimal; /* "It's good!" */
952
953 for(row=0;row<raidPtrs[unit]->numRow;row++) {
954 ci_label.row = row;
955 for(column=0;column<raidPtrs[unit]->numCol;column++) {
956 ci_label.column = column;
957 raidwrite_component_label(
958 raidPtrs[unit]->Disks[row][column].dev,
959 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
960 &ci_label );
961 }
962 }
963
964 return (retcode);
965
966 /* initialize all parity */
967 case RAIDFRAME_REWRITEPARITY:
968
969 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
970 /* Parity for RAID 0 is trivially correct */
971 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
972 return(0);
973 }
974
975 /* borrow the thread of the requesting process */
976 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
977 retcode = rf_RewriteParity(raidPtrs[unit]);
978 /* return I/O Error if the parity rewrite fails */
979
980 if (retcode) {
981 retcode = EIO;
982 } else {
983 /* set the clean bit! If we shutdown correctly,
984 the clean bit on each component label will get
985 set */
986 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
987 }
988 return (retcode);
989
990
991 case RAIDFRAME_ADD_HOT_SPARE:
992 sparePtr = (RF_SingleComponent_t *) data;
993 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
994 printf("Adding spare\n");
995 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
996 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
997 return(retcode);
998
999 case RAIDFRAME_REMOVE_HOT_SPARE:
1000 return(retcode);
1001
1002 case RAIDFRAME_REBUILD_IN_PLACE:
1003 componentPtr = (RF_SingleComponent_t *) data;
1004 memcpy( &component, componentPtr,
1005 sizeof(RF_SingleComponent_t));
1006 row = component.row;
1007 column = component.column;
1008 printf("Rebuild: %d %d\n",row, column);
1009 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1010 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1011 return(EINVAL);
1012 }
1013 printf("Attempting a rebuild in place\n");
1014 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1015 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1016 return(retcode);
1017
1018 /* issue a test-unit-ready through raidframe to the indicated
1019 * device */
1020 #if 0 /* XXX not supported yet (ever?) */
1021 case RAIDFRAME_TUR:
1022 /* debug only */
1023 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1024 return (retcode);
1025 #endif
1026 case RAIDFRAME_GET_INFO:
1027 {
1028 RF_Raid_t *raid = raidPtrs[unit];
1029 RF_DeviceConfig_t *cfg, **ucfgp;
1030 int i, j, d;
1031
1032 if (!raid->valid)
1033 return (ENODEV);
1034 ucfgp = (RF_DeviceConfig_t **) data;
1035 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1036 (RF_DeviceConfig_t *));
1037 if (cfg == NULL)
1038 return (ENOMEM);
1039 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1040 cfg->rows = raid->numRow;
1041 cfg->cols = raid->numCol;
1042 cfg->ndevs = raid->numRow * raid->numCol;
1043 if (cfg->ndevs >= RF_MAX_DISKS) {
1044 cfg->ndevs = 0;
1045 return (ENOMEM);
1046 }
1047 cfg->nspares = raid->numSpare;
1048 if (cfg->nspares >= RF_MAX_DISKS) {
1049 cfg->nspares = 0;
1050 return (ENOMEM);
1051 }
1052 cfg->maxqdepth = raid->maxQueueDepth;
1053 d = 0;
1054 for (i = 0; i < cfg->rows; i++) {
1055 for (j = 0; j < cfg->cols; j++) {
1056 cfg->devs[d] = raid->Disks[i][j];
1057 d++;
1058 }
1059 }
1060 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1061 cfg->spares[i] = raid->Disks[0][j];
1062 }
1063 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1064 sizeof(RF_DeviceConfig_t));
1065 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1066
1067 return (retcode);
1068 }
1069 break;
1070
1071 case RAIDFRAME_RESET_ACCTOTALS:
1072 {
1073 RF_Raid_t *raid = raidPtrs[unit];
1074
1075 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1076 return (0);
1077 }
1078 break;
1079
1080 case RAIDFRAME_GET_ACCTOTALS:
1081 {
1082 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1083 RF_Raid_t *raid = raidPtrs[unit];
1084
1085 *totals = raid->acc_totals;
1086 return (0);
1087 }
1088 break;
1089
1090 case RAIDFRAME_KEEP_ACCTOTALS:
1091 {
1092 RF_Raid_t *raid = raidPtrs[unit];
1093 int *keep = (int *) data;
1094
1095 raid->keep_acc_totals = *keep;
1096 return (0);
1097 }
1098 break;
1099
1100 case RAIDFRAME_GET_SIZE:
1101 *(int *) data = raidPtrs[unit]->totalSectors;
1102 return (0);
1103
1104 #define RAIDFRAME_RECON 1
1105 /* XXX The above should probably be set somewhere else!! GO */
1106 #if RAIDFRAME_RECON > 0
1107
1108 /* fail a disk & optionally start reconstruction */
1109 case RAIDFRAME_FAIL_DISK:
1110 rr = (struct rf_recon_req *) data;
1111
1112 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1113 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1114 return (EINVAL);
1115
1116 printf("raid%d: Failing the disk: row: %d col: %d\n",
1117 unit, rr->row, rr->col);
1118
1119 /* make a copy of the recon request so that we don't rely on
1120 * the user's buffer */
1121 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1122 bcopy(rr, rrcopy, sizeof(*rr));
1123 rrcopy->raidPtr = (void *) raidPtrs[unit];
1124
1125 LOCK_RECON_Q_MUTEX();
1126 rrcopy->next = recon_queue;
1127 recon_queue = rrcopy;
1128 wakeup(&recon_queue);
1129 UNLOCK_RECON_Q_MUTEX();
1130
1131 return (0);
1132
1133 /* invoke a copyback operation after recon on whatever disk
1134 * needs it, if any */
1135 case RAIDFRAME_COPYBACK:
1136 /* borrow the current thread to get this done */
1137 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1138 rf_CopybackReconstructedData(raidPtrs[unit]);
1139 return (0);
1140
1141 /* return the percentage completion of reconstruction */
1142 case RAIDFRAME_CHECKRECON:
1143 row = *(int *) data;
1144 if (row < 0 || row >= raidPtrs[unit]->numRow)
1145 return (EINVAL);
1146 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1147 *(int *) data = 100;
1148 else
1149 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1150 return (0);
1151
1152 /* the sparetable daemon calls this to wait for the kernel to
1153 * need a spare table. this ioctl does not return until a
1154 * spare table is needed. XXX -- calling mpsleep here in the
1155 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1156 * -- I should either compute the spare table in the kernel,
1157 * or have a different -- XXX XXX -- interface (a different
1158 * character device) for delivering the table -- XXX */
1159 #if 0
1160 case RAIDFRAME_SPARET_WAIT:
1161 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1162 while (!rf_sparet_wait_queue)
1163 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1164 waitreq = rf_sparet_wait_queue;
1165 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1166 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1167
1168 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1169
1170 RF_Free(waitreq, sizeof(*waitreq));
1171 return (0);
1172
1173
1174 /* wakes up a process waiting on SPARET_WAIT and puts an error
1175 * code in it that will cause the dameon to exit */
1176 case RAIDFRAME_ABORT_SPARET_WAIT:
1177 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1178 waitreq->fcol = -1;
1179 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1180 waitreq->next = rf_sparet_wait_queue;
1181 rf_sparet_wait_queue = waitreq;
1182 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1183 wakeup(&rf_sparet_wait_queue);
1184 return (0);
1185
1186 /* used by the spare table daemon to deliver a spare table
1187 * into the kernel */
1188 case RAIDFRAME_SEND_SPARET:
1189
1190 /* install the spare table */
1191 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1192
1193 /* respond to the requestor. the return status of the spare
1194 * table installation is passed in the "fcol" field */
1195 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1196 waitreq->fcol = retcode;
1197 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1198 waitreq->next = rf_sparet_resp_queue;
1199 rf_sparet_resp_queue = waitreq;
1200 wakeup(&rf_sparet_resp_queue);
1201 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1202
1203 return (retcode);
1204 #endif
1205
1206
1207 #endif /* RAIDFRAME_RECON > 0 */
1208
1209 default:
1210 break; /* fall through to the os-specific code below */
1211
1212 }
1213
1214 if (!raidPtrs[unit]->valid)
1215 return (EINVAL);
1216
1217 /*
1218 * Add support for "regular" device ioctls here.
1219 */
1220
1221 switch (cmd) {
1222 case DIOCGDINFO:
1223 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1224 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1225 break;
1226
1227 case DIOCGPART:
1228 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1229 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1230 ((struct partinfo *) data)->part =
1231 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1232 break;
1233
1234 case DIOCWDINFO:
1235 db1_printf(("DIOCWDINFO\n"));
1236 case DIOCSDINFO:
1237 db1_printf(("DIOCSDINFO\n"));
1238 if ((error = raidlock(rs)) != 0)
1239 return (error);
1240
1241 rs->sc_flags |= RAIDF_LABELLING;
1242
1243 error = setdisklabel(rs->sc_dkdev.dk_label,
1244 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1245 if (error == 0) {
1246 if (cmd == DIOCWDINFO)
1247 error = writedisklabel(RAIDLABELDEV(dev),
1248 raidstrategy, rs->sc_dkdev.dk_label,
1249 rs->sc_dkdev.dk_cpulabel);
1250 }
1251 rs->sc_flags &= ~RAIDF_LABELLING;
1252
1253 raidunlock(rs);
1254
1255 if (error)
1256 return (error);
1257 break;
1258
1259 case DIOCWLABEL:
1260 db1_printf(("DIOCWLABEL\n"));
1261 if (*(int *) data != 0)
1262 rs->sc_flags |= RAIDF_WLABEL;
1263 else
1264 rs->sc_flags &= ~RAIDF_WLABEL;
1265 break;
1266
1267 case DIOCGDEFLABEL:
1268 db1_printf(("DIOCGDEFLABEL\n"));
1269 raidgetdefaultlabel(raidPtrs[unit], rs,
1270 (struct disklabel *) data);
1271 break;
1272
1273 default:
1274 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1275 }
1276 return (retcode);
1277
1278 }
1279
1280
1281 /* raidinit -- complete the rest of the initialization for the
1282 RAIDframe device. */
1283
1284
1285 static int
1286 raidinit(dev, raidPtr, unit)
1287 dev_t dev;
1288 RF_Raid_t *raidPtr;
1289 int unit;
1290 {
1291 int retcode;
1292 /* int ix; */
1293 /* struct raidbuf *raidbp; */
1294 struct raid_softc *rs;
1295
1296 retcode = 0;
1297
1298 rs = &raid_softc[unit];
1299 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1300 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1301
1302
1303 /* XXX should check return code first... */
1304 rs->sc_flags |= RAIDF_INITED;
1305
1306 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1307
1308 rs->sc_dkdev.dk_name = rs->sc_xname;
1309
1310 /* disk_attach actually creates space for the CPU disklabel, among
1311 * other things, so it's critical to call this *BEFORE* we try putzing
1312 * with disklabels. */
1313
1314 disk_attach(&rs->sc_dkdev);
1315
1316 /* XXX There may be a weird interaction here between this, and
1317 * protectedSectors, as used in RAIDframe. */
1318
1319 rs->sc_size = raidPtr->totalSectors;
1320 rs->sc_dev = dev;
1321
1322 return (retcode);
1323 }
1324
1325 /*
1326 * This kernel thread never exits. It is created once, and persists
1327 * until the system reboots.
1328 */
1329
1330 void
1331 rf_ReconKernelThread()
1332 {
1333 struct rf_recon_req *req;
1334 int s;
1335
1336 /* XXX not sure what spl() level we should be at here... probably
1337 * splbio() */
1338 s = splbio();
1339
1340 while (1) {
1341 /* grab the next reconstruction request from the queue */
1342 LOCK_RECON_Q_MUTEX();
1343 while (!recon_queue) {
1344 UNLOCK_RECON_Q_MUTEX();
1345 tsleep(&recon_queue, PRIBIO,
1346 "raidframe recon", 0);
1347 LOCK_RECON_Q_MUTEX();
1348 }
1349 req = recon_queue;
1350 recon_queue = recon_queue->next;
1351 UNLOCK_RECON_Q_MUTEX();
1352
1353 /*
1354 * If flags specifies that we should start recon, this call
1355 * will not return until reconstruction completes, fails,
1356 * or is aborted.
1357 */
1358 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1359 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1360
1361 RF_Free(req, sizeof(*req));
1362 }
1363 }
1364 /* wake up the daemon & tell it to get us a spare table
1365 * XXX
1366 * the entries in the queues should be tagged with the raidPtr
1367 * so that in the extremely rare case that two recons happen at once,
1368 * we know for which device were requesting a spare table
1369 * XXX
1370 */
1371 int
1372 rf_GetSpareTableFromDaemon(req)
1373 RF_SparetWait_t *req;
1374 {
1375 int retcode;
1376
1377 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1378 req->next = rf_sparet_wait_queue;
1379 rf_sparet_wait_queue = req;
1380 wakeup(&rf_sparet_wait_queue);
1381
1382 /* mpsleep unlocks the mutex */
1383 while (!rf_sparet_resp_queue) {
1384 tsleep(&rf_sparet_resp_queue, PRIBIO,
1385 "raidframe getsparetable", 0);
1386 #if 0
1387 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1388 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1389 MS_LOCK_SIMPLE);
1390 #endif
1391 }
1392 req = rf_sparet_resp_queue;
1393 rf_sparet_resp_queue = req->next;
1394 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1395
1396 retcode = req->fcol;
1397 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1398 * alloc'd */
1399 return (retcode);
1400 }
1401 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1402 * bp & passes it down.
1403 * any calls originating in the kernel must use non-blocking I/O
1404 * do some extra sanity checking to return "appropriate" error values for
1405 * certain conditions (to make some standard utilities work)
1406 */
1407 int
1408 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1409 RF_Raid_t *raidPtr;
1410 struct buf *bp;
1411 RF_RaidAccessFlags_t flags;
1412 void (*cbFunc) (struct buf *);
1413 void *cbArg;
1414 {
1415 RF_SectorCount_t num_blocks, pb, sum;
1416 RF_RaidAddr_t raid_addr;
1417 int retcode;
1418 struct partition *pp;
1419 daddr_t blocknum;
1420 int unit;
1421 struct raid_softc *rs;
1422 int do_async;
1423
1424 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1425
1426 unit = raidPtr->raidid;
1427 rs = &raid_softc[unit];
1428
1429 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1430 * partition.. Need to make it absolute to the underlying device.. */
1431
1432 blocknum = bp->b_blkno;
1433 if (DISKPART(bp->b_dev) != RAW_PART) {
1434 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1435 blocknum += pp->p_offset;
1436 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1437 pp->p_offset));
1438 } else {
1439 db1_printf(("Is raw..\n"));
1440 }
1441 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1442
1443 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1444 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1445
1446 /* *THIS* is where we adjust what block we're going to... but DO NOT
1447 * TOUCH bp->b_blkno!!! */
1448 raid_addr = blocknum;
1449
1450 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1451 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1452 sum = raid_addr + num_blocks + pb;
1453 if (1 || rf_debugKernelAccess) {
1454 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1455 (int) raid_addr, (int) sum, (int) num_blocks,
1456 (int) pb, (int) bp->b_resid));
1457 }
1458 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1459 || (sum < num_blocks) || (sum < pb)) {
1460 bp->b_error = ENOSPC;
1461 bp->b_flags |= B_ERROR;
1462 bp->b_resid = bp->b_bcount;
1463 biodone(bp);
1464 return (bp->b_error);
1465 }
1466 /*
1467 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1468 */
1469
1470 if (bp->b_bcount & raidPtr->sectorMask) {
1471 bp->b_error = EINVAL;
1472 bp->b_flags |= B_ERROR;
1473 bp->b_resid = bp->b_bcount;
1474 biodone(bp);
1475 return (bp->b_error);
1476 }
1477 db1_printf(("Calling DoAccess..\n"));
1478
1479 /*
1480 * XXX For now, all writes are sync
1481 */
1482 do_async = 1;
1483 if ((bp->b_flags & B_READ) == 0)
1484 do_async = 0;
1485
1486 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1487 * B_READ instead */
1488 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1489 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1490 do_async, raid_addr, num_blocks,
1491 bp->b_un.b_addr,
1492 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1493 NULL, cbFunc, cbArg);
1494 #if 0
1495 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1496 bp->b_data, (int) bp->b_resid));
1497 #endif
1498
1499 /*
1500 * If we requested sync I/O, sleep here.
1501 */
1502 if ((retcode == 0) && (do_async == 0))
1503 tsleep(bp, PRIBIO, "raidsyncio", 0);
1504
1505 return (retcode);
1506 }
1507 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1508
1509 int
1510 rf_DispatchKernelIO(queue, req)
1511 RF_DiskQueue_t *queue;
1512 RF_DiskQueueData_t *req;
1513 {
1514 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1515 struct buf *bp;
1516 struct raidbuf *raidbp = NULL;
1517 struct raid_softc *rs;
1518 int unit;
1519
1520 /* XXX along with the vnode, we also need the softc associated with
1521 * this device.. */
1522
1523 req->queue = queue;
1524
1525 unit = queue->raidPtr->raidid;
1526
1527 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1528
1529 if (unit >= numraid) {
1530 printf("Invalid unit number: %d %d\n", unit, numraid);
1531 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1532 }
1533 rs = &raid_softc[unit];
1534
1535 /* XXX is this the right place? */
1536 disk_busy(&rs->sc_dkdev);
1537
1538 bp = req->bp;
1539 #if 1
1540 /* XXX when there is a physical disk failure, someone is passing us a
1541 * buffer that contains old stuff!! Attempt to deal with this problem
1542 * without taking a performance hit... (not sure where the real bug
1543 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1544
1545 if (bp->b_flags & B_ERROR) {
1546 bp->b_flags &= ~B_ERROR;
1547 }
1548 if (bp->b_error != 0) {
1549 bp->b_error = 0;
1550 }
1551 #endif
1552 raidbp = RAIDGETBUF(rs);
1553
1554 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1555
1556 /*
1557 * context for raidiodone
1558 */
1559 raidbp->rf_obp = bp;
1560 raidbp->req = req;
1561
1562 switch (req->type) {
1563 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1564 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1565 * queue->row, queue->col); */
1566 /* XXX need to do something extra here.. */
1567 /* I'm leaving this in, as I've never actually seen it used,
1568 * and I'd like folks to report it... GO */
1569 printf(("WAKEUP CALLED\n"));
1570 queue->numOutstanding++;
1571
1572 /* XXX need to glue the original buffer into this?? */
1573
1574 KernelWakeupFunc(&raidbp->rf_buf);
1575 break;
1576
1577 case RF_IO_TYPE_READ:
1578 case RF_IO_TYPE_WRITE:
1579
1580 if (req->tracerec) {
1581 RF_ETIMER_START(req->tracerec->timer);
1582 }
1583 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1584 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1585 req->sectorOffset, req->numSector,
1586 req->buf, KernelWakeupFunc, (void *) req,
1587 queue->raidPtr->logBytesPerSector, req->b_proc);
1588
1589 if (rf_debugKernelAccess) {
1590 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1591 (long) bp->b_blkno));
1592 }
1593 queue->numOutstanding++;
1594 queue->last_deq_sector = req->sectorOffset;
1595 /* acc wouldn't have been let in if there were any pending
1596 * reqs at any other priority */
1597 queue->curPriority = req->priority;
1598 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1599 * req->type, queue->row, queue->col); */
1600
1601 db1_printf(("Going for %c to unit %d row %d col %d\n",
1602 req->type, unit, queue->row, queue->col));
1603 db1_printf(("sector %d count %d (%d bytes) %d\n",
1604 (int) req->sectorOffset, (int) req->numSector,
1605 (int) (req->numSector <<
1606 queue->raidPtr->logBytesPerSector),
1607 (int) queue->raidPtr->logBytesPerSector));
1608 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1609 raidbp->rf_buf.b_vp->v_numoutput++;
1610 }
1611 VOP_STRATEGY(&raidbp->rf_buf);
1612
1613 break;
1614
1615 default:
1616 panic("bad req->type in rf_DispatchKernelIO");
1617 }
1618 db1_printf(("Exiting from DispatchKernelIO\n"));
1619 return (0);
1620 }
1621 /* this is the callback function associated with a I/O invoked from
1622 kernel code.
1623 */
1624 static void
1625 KernelWakeupFunc(vbp)
1626 struct buf *vbp;
1627 {
1628 RF_DiskQueueData_t *req = NULL;
1629 RF_DiskQueue_t *queue;
1630 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1631 struct buf *bp;
1632 struct raid_softc *rs;
1633 int unit;
1634 register int s;
1635
1636 s = splbio(); /* XXX */
1637 db1_printf(("recovering the request queue:\n"));
1638 req = raidbp->req;
1639
1640 bp = raidbp->rf_obp;
1641 #if 0
1642 db1_printf(("bp=0x%x\n", bp));
1643 #endif
1644
1645 queue = (RF_DiskQueue_t *) req->queue;
1646
1647 if (raidbp->rf_buf.b_flags & B_ERROR) {
1648 #if 0
1649 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1650 #endif
1651 bp->b_flags |= B_ERROR;
1652 bp->b_error = raidbp->rf_buf.b_error ?
1653 raidbp->rf_buf.b_error : EIO;
1654 }
1655 #if 0
1656 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1657 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1658 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1659 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1660 #endif
1661
1662 /* XXX methinks this could be wrong... */
1663 #if 1
1664 bp->b_resid = raidbp->rf_buf.b_resid;
1665 #endif
1666
1667 if (req->tracerec) {
1668 RF_ETIMER_STOP(req->tracerec->timer);
1669 RF_ETIMER_EVAL(req->tracerec->timer);
1670 RF_LOCK_MUTEX(rf_tracing_mutex);
1671 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1672 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1673 req->tracerec->num_phys_ios++;
1674 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1675 }
1676 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1677
1678 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1679
1680
1681 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1682 * ballistic, and mark the component as hosed... */
1683 #if 1
1684 if (bp->b_flags & B_ERROR) {
1685 /* Mark the disk as dead */
1686 /* but only mark it once... */
1687 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1688 rf_ds_optimal) {
1689 printf("raid%d: IO Error. Marking %s as failed.\n",
1690 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1691 queue->raidPtr->Disks[queue->row][queue->col].status =
1692 rf_ds_failed;
1693 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1694 queue->raidPtr->numFailures++;
1695 /* XXX here we should bump the version number for each component, and write that data out */
1696 } else { /* Disk is already dead... */
1697 /* printf("Disk already marked as dead!\n"); */
1698 }
1699
1700 }
1701 #endif
1702
1703 rs = &raid_softc[unit];
1704 RAIDPUTBUF(rs, raidbp);
1705
1706
1707 if (bp->b_resid == 0) {
1708 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1709 unit, bp->b_resid, bp->b_bcount));
1710 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1711 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1712 } else {
1713 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1714 }
1715
1716 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1717 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1718 /* printf("Exiting KernelWakeupFunc\n"); */
1719
1720 splx(s); /* XXX */
1721 }
1722
1723
1724
1725 /*
1726 * initialize a buf structure for doing an I/O in the kernel.
1727 */
1728 static void
1729 InitBP(
1730 struct buf * bp,
1731 struct vnode * b_vp,
1732 unsigned rw_flag,
1733 dev_t dev,
1734 RF_SectorNum_t startSect,
1735 RF_SectorCount_t numSect,
1736 caddr_t buf,
1737 void (*cbFunc) (struct buf *),
1738 void *cbArg,
1739 int logBytesPerSector,
1740 struct proc * b_proc)
1741 {
1742 /* bp->b_flags = B_PHYS | rw_flag; */
1743 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1744 bp->b_bcount = numSect << logBytesPerSector;
1745 bp->b_bufsize = bp->b_bcount;
1746 bp->b_error = 0;
1747 bp->b_dev = dev;
1748 db1_printf(("bp->b_dev is %d\n", dev));
1749 bp->b_un.b_addr = buf;
1750 #if 0
1751 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1752 #endif
1753
1754 bp->b_blkno = startSect;
1755 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1756 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1757 if (bp->b_bcount == 0) {
1758 panic("bp->b_bcount is zero in InitBP!!\n");
1759 }
1760 bp->b_proc = b_proc;
1761 bp->b_iodone = cbFunc;
1762 bp->b_vp = b_vp;
1763
1764 }
1765 /* Extras... */
1766
1767 unsigned int
1768 rpcc()
1769 {
1770 /* XXX no clue what this is supposed to do.. my guess is that it's
1771 * supposed to read the CPU cycle counter... */
1772 /* db1_printf("this is supposed to do something useful too!??\n"); */
1773 return (0);
1774 }
1775 #if 0
1776 int
1777 rf_GetSpareTableFromDaemon(req)
1778 RF_SparetWait_t *req;
1779 {
1780 int retcode = 1;
1781 printf("This is supposed to do something useful!!\n"); /* XXX */
1782
1783 return (retcode);
1784
1785 }
1786 #endif
1787
1788 static void
1789 raidgetdefaultlabel(raidPtr, rs, lp)
1790 RF_Raid_t *raidPtr;
1791 struct raid_softc *rs;
1792 struct disklabel *lp;
1793 {
1794 db1_printf(("Building a default label...\n"));
1795 bzero(lp, sizeof(*lp));
1796
1797 /* fabricate a label... */
1798 lp->d_secperunit = raidPtr->totalSectors;
1799 lp->d_secsize = raidPtr->bytesPerSector;
1800 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1801 lp->d_ntracks = 1;
1802 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1803 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1804
1805 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1806 lp->d_type = DTYPE_RAID;
1807 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1808 lp->d_rpm = 3600;
1809 lp->d_interleave = 1;
1810 lp->d_flags = 0;
1811
1812 lp->d_partitions[RAW_PART].p_offset = 0;
1813 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1814 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1815 lp->d_npartitions = RAW_PART + 1;
1816
1817 lp->d_magic = DISKMAGIC;
1818 lp->d_magic2 = DISKMAGIC;
1819 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1820
1821 }
1822 /*
1823 * Read the disklabel from the raid device. If one is not present, fake one
1824 * up.
1825 */
1826 static void
1827 raidgetdisklabel(dev)
1828 dev_t dev;
1829 {
1830 int unit = raidunit(dev);
1831 struct raid_softc *rs = &raid_softc[unit];
1832 char *errstring;
1833 struct disklabel *lp = rs->sc_dkdev.dk_label;
1834 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1835 RF_Raid_t *raidPtr;
1836
1837 db1_printf(("Getting the disklabel...\n"));
1838
1839 bzero(clp, sizeof(*clp));
1840
1841 raidPtr = raidPtrs[unit];
1842
1843 raidgetdefaultlabel(raidPtr, rs, lp);
1844
1845 /*
1846 * Call the generic disklabel extraction routine.
1847 */
1848 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1849 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1850 if (errstring)
1851 raidmakedisklabel(rs);
1852 else {
1853 int i;
1854 struct partition *pp;
1855
1856 /*
1857 * Sanity check whether the found disklabel is valid.
1858 *
1859 * This is necessary since total size of the raid device
1860 * may vary when an interleave is changed even though exactly
1861 * same componets are used, and old disklabel may used
1862 * if that is found.
1863 */
1864 if (lp->d_secperunit != rs->sc_size)
1865 printf("WARNING: %s: "
1866 "total sector size in disklabel (%d) != "
1867 "the size of raid (%ld)\n", rs->sc_xname,
1868 lp->d_secperunit, (long) rs->sc_size);
1869 for (i = 0; i < lp->d_npartitions; i++) {
1870 pp = &lp->d_partitions[i];
1871 if (pp->p_offset + pp->p_size > rs->sc_size)
1872 printf("WARNING: %s: end of partition `%c' "
1873 "exceeds the size of raid (%ld)\n",
1874 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1875 }
1876 }
1877
1878 }
1879 /*
1880 * Take care of things one might want to take care of in the event
1881 * that a disklabel isn't present.
1882 */
1883 static void
1884 raidmakedisklabel(rs)
1885 struct raid_softc *rs;
1886 {
1887 struct disklabel *lp = rs->sc_dkdev.dk_label;
1888 db1_printf(("Making a label..\n"));
1889
1890 /*
1891 * For historical reasons, if there's no disklabel present
1892 * the raw partition must be marked FS_BSDFFS.
1893 */
1894
1895 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1896
1897 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1898
1899 lp->d_checksum = dkcksum(lp);
1900 }
1901 /*
1902 * Lookup the provided name in the filesystem. If the file exists,
1903 * is a valid block device, and isn't being used by anyone else,
1904 * set *vpp to the file's vnode.
1905 * You'll find the original of this in ccd.c
1906 */
1907 int
1908 raidlookup(path, p, vpp)
1909 char *path;
1910 struct proc *p;
1911 struct vnode **vpp; /* result */
1912 {
1913 struct nameidata nd;
1914 struct vnode *vp;
1915 struct vattr va;
1916 int error;
1917
1918 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1919 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1920 #ifdef DEBUG
1921 printf("RAIDframe: vn_open returned %d\n", error);
1922 #endif
1923 return (error);
1924 }
1925 vp = nd.ni_vp;
1926 if (vp->v_usecount > 1) {
1927 VOP_UNLOCK(vp, 0);
1928 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1929 return (EBUSY);
1930 }
1931 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1932 VOP_UNLOCK(vp, 0);
1933 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1934 return (error);
1935 }
1936 /* XXX: eventually we should handle VREG, too. */
1937 if (va.va_type != VBLK) {
1938 VOP_UNLOCK(vp, 0);
1939 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1940 return (ENOTBLK);
1941 }
1942 VOP_UNLOCK(vp, 0);
1943 *vpp = vp;
1944 return (0);
1945 }
1946 /*
1947 * Wait interruptibly for an exclusive lock.
1948 *
1949 * XXX
1950 * Several drivers do this; it should be abstracted and made MP-safe.
1951 * (Hmm... where have we seen this warning before :-> GO )
1952 */
1953 static int
1954 raidlock(rs)
1955 struct raid_softc *rs;
1956 {
1957 int error;
1958
1959 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1960 rs->sc_flags |= RAIDF_WANTED;
1961 if ((error =
1962 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1963 return (error);
1964 }
1965 rs->sc_flags |= RAIDF_LOCKED;
1966 return (0);
1967 }
1968 /*
1969 * Unlock and wake up any waiters.
1970 */
1971 static void
1972 raidunlock(rs)
1973 struct raid_softc *rs;
1974 {
1975
1976 rs->sc_flags &= ~RAIDF_LOCKED;
1977 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1978 rs->sc_flags &= ~RAIDF_WANTED;
1979 wakeup(rs);
1980 }
1981 }
1982
1983
1984 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1985 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1986
1987 int
1988 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1989 {
1990 RF_ComponentLabel_t component_label;
1991 raidread_component_label(dev, b_vp, &component_label);
1992 component_label.mod_counter = mod_counter;
1993 component_label.clean = RF_RAID_CLEAN;
1994 raidwrite_component_label(dev, b_vp, &component_label);
1995 return(0);
1996 }
1997
1998
1999 int
2000 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2001 {
2002 RF_ComponentLabel_t component_label;
2003 raidread_component_label(dev, b_vp, &component_label);
2004 component_label.mod_counter = mod_counter;
2005 component_label.clean = RF_RAID_DIRTY;
2006 raidwrite_component_label(dev, b_vp, &component_label);
2007 return(0);
2008 }
2009
2010 /* ARGSUSED */
2011 int
2012 raidread_component_label(dev, b_vp, component_label)
2013 dev_t dev;
2014 struct vnode *b_vp;
2015 RF_ComponentLabel_t *component_label;
2016 {
2017 struct buf *bp;
2018 int error;
2019
2020 /* XXX should probably ensure that we don't try to do this if
2021 someone has changed rf_protected_sectors. */
2022
2023 /* get a block of the appropriate size... */
2024 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2025 bp->b_dev = dev;
2026
2027 /* get our ducks in a row for the read */
2028 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2029 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2030 bp->b_flags = B_BUSY | B_READ;
2031 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2032
2033 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2034
2035 error = biowait(bp);
2036
2037 if (!error) {
2038 memcpy(component_label, bp->b_un.b_addr,
2039 sizeof(RF_ComponentLabel_t));
2040 #if 0
2041 printf("raidread_component_label: got component label:\n");
2042 printf("Version: %d\n",component_label->version);
2043 printf("Serial Number: %d\n",component_label->serial_number);
2044 printf("Mod counter: %d\n",component_label->mod_counter);
2045 printf("Row: %d\n", component_label->row);
2046 printf("Column: %d\n", component_label->column);
2047 printf("Num Rows: %d\n", component_label->num_rows);
2048 printf("Num Columns: %d\n", component_label->num_columns);
2049 printf("Clean: %d\n", component_label->clean);
2050 printf("Status: %d\n", component_label->status);
2051 #endif
2052 } else {
2053 printf("Failed to read RAID component label!\n");
2054 }
2055
2056 bp->b_flags = B_INVAL | B_AGE;
2057 brelse(bp);
2058 return(error);
2059 }
2060 /* ARGSUSED */
2061 int
2062 raidwrite_component_label(dev, b_vp, component_label)
2063 dev_t dev;
2064 struct vnode *b_vp;
2065 RF_ComponentLabel_t *component_label;
2066 {
2067 struct buf *bp;
2068 int error;
2069
2070 /* get a block of the appropriate size... */
2071 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2072 bp->b_dev = dev;
2073
2074 /* get our ducks in a row for the write */
2075 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2076 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2077 bp->b_flags = B_BUSY | B_WRITE;
2078 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2079
2080 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2081
2082 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2083
2084 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2085 error = biowait(bp);
2086 bp->b_flags = B_INVAL | B_AGE;
2087 brelse(bp);
2088 if (error) {
2089 printf("Failed to write RAID component info!\n");
2090 }
2091
2092 return(error);
2093 }
2094
2095 void
2096 rf_markalldirty( raidPtr )
2097 RF_Raid_t *raidPtr;
2098 {
2099 RF_ComponentLabel_t c_label;
2100 int r,c;
2101
2102 raidPtr->mod_counter++;
2103 for (r = 0; r < raidPtr->numRow; r++) {
2104 for (c = 0; c < raidPtr->numCol; c++) {
2105 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2106 raidread_component_label(
2107 raidPtr->Disks[r][c].dev,
2108 raidPtr->raid_cinfo[r][c].ci_vp,
2109 &c_label);
2110 if (c_label.status == rf_ds_spared) {
2111 /* XXX do something special...
2112 but whatever you do, don't
2113 try to access it!! */
2114 } else {
2115 #if 0
2116 c_label.status =
2117 raidPtr->Disks[r][c].status;
2118 raidwrite_component_label(
2119 raidPtr->Disks[r][c].dev,
2120 raidPtr->raid_cinfo[r][c].ci_vp,
2121 &c_label);
2122 #endif
2123 raidmarkdirty(
2124 raidPtr->Disks[r][c].dev,
2125 raidPtr->raid_cinfo[r][c].ci_vp,
2126 raidPtr->mod_counter);
2127 }
2128 }
2129 }
2130 }
2131 /* printf("Component labels marked dirty.\n"); */
2132 #if 0
2133 for( c = 0; c < raidPtr->numSpare ; c++) {
2134 sparecol = raidPtr->numCol + c;
2135 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2136 /*
2137
2138 XXX this is where we get fancy and map this spare
2139 into it's correct spot in the array.
2140
2141 */
2142 /*
2143
2144 we claim this disk is "optimal" if it's
2145 rf_ds_used_spare, as that means it should be
2146 directly substitutable for the disk it replaced.
2147 We note that too...
2148
2149 */
2150
2151 for(i=0;i<raidPtr->numRow;i++) {
2152 for(j=0;j<raidPtr->numCol;j++) {
2153 if ((raidPtr->Disks[i][j].spareRow ==
2154 r) &&
2155 (raidPtr->Disks[i][j].spareCol ==
2156 sparecol)) {
2157 srow = r;
2158 scol = sparecol;
2159 break;
2160 }
2161 }
2162 }
2163
2164 raidread_component_label(
2165 raidPtr->Disks[r][sparecol].dev,
2166 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2167 &c_label);
2168 /* make sure status is noted */
2169 c_label.version = RF_COMPONENT_LABEL_VERSION;
2170 c_label.mod_counter = raidPtr->mod_counter;
2171 c_label.serial_number = raidPtr->serial_number;
2172 c_label.row = srow;
2173 c_label.column = scol;
2174 c_label.num_rows = raidPtr->numRow;
2175 c_label.num_columns = raidPtr->numCol;
2176 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2177 c_label.status = rf_ds_optimal;
2178 raidwrite_component_label(
2179 raidPtr->Disks[r][sparecol].dev,
2180 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2181 &c_label);
2182 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2183 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2184 }
2185 }
2186
2187 #endif
2188 }
2189
2190
2191 void
2192 rf_update_component_labels( raidPtr )
2193 RF_Raid_t *raidPtr;
2194 {
2195 RF_ComponentLabel_t c_label;
2196 int sparecol;
2197 int r,c;
2198 int i,j;
2199 int srow, scol;
2200
2201 srow = -1;
2202 scol = -1;
2203
2204 /* XXX should do extra checks to make sure things really are clean,
2205 rather than blindly setting the clean bit... */
2206
2207 raidPtr->mod_counter++;
2208
2209 for (r = 0; r < raidPtr->numRow; r++) {
2210 for (c = 0; c < raidPtr->numCol; c++) {
2211 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2212 raidread_component_label(
2213 raidPtr->Disks[r][c].dev,
2214 raidPtr->raid_cinfo[r][c].ci_vp,
2215 &c_label);
2216 /* make sure status is noted */
2217 c_label.status = rf_ds_optimal;
2218 raidwrite_component_label(
2219 raidPtr->Disks[r][c].dev,
2220 raidPtr->raid_cinfo[r][c].ci_vp,
2221 &c_label);
2222 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2223 raidmarkclean(
2224 raidPtr->Disks[r][c].dev,
2225 raidPtr->raid_cinfo[r][c].ci_vp,
2226 raidPtr->mod_counter);
2227 }
2228 }
2229 /* else we don't touch it.. */
2230 #if 0
2231 else if (raidPtr->Disks[r][c].status !=
2232 rf_ds_failed) {
2233 raidread_component_label(
2234 raidPtr->Disks[r][c].dev,
2235 raidPtr->raid_cinfo[r][c].ci_vp,
2236 &c_label);
2237 /* make sure status is noted */
2238 c_label.status =
2239 raidPtr->Disks[r][c].status;
2240 raidwrite_component_label(
2241 raidPtr->Disks[r][c].dev,
2242 raidPtr->raid_cinfo[r][c].ci_vp,
2243 &c_label);
2244 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2245 raidmarkclean(
2246 raidPtr->Disks[r][c].dev,
2247 raidPtr->raid_cinfo[r][c].ci_vp,
2248 raidPtr->mod_counter);
2249 }
2250 }
2251 #endif
2252 }
2253 }
2254
2255 for( c = 0; c < raidPtr->numSpare ; c++) {
2256 sparecol = raidPtr->numCol + c;
2257 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2258 /*
2259
2260 we claim this disk is "optimal" if it's
2261 rf_ds_used_spare, as that means it should be
2262 directly substitutable for the disk it replaced.
2263 We note that too...
2264
2265 */
2266
2267 for(i=0;i<raidPtr->numRow;i++) {
2268 for(j=0;j<raidPtr->numCol;j++) {
2269 if ((raidPtr->Disks[i][j].spareRow ==
2270 0) &&
2271 (raidPtr->Disks[i][j].spareCol ==
2272 sparecol)) {
2273 srow = i;
2274 scol = j;
2275 break;
2276 }
2277 }
2278 }
2279
2280 raidread_component_label(
2281 raidPtr->Disks[0][sparecol].dev,
2282 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2283 &c_label);
2284 /* make sure status is noted */
2285 c_label.version = RF_COMPONENT_LABEL_VERSION;
2286 c_label.mod_counter = raidPtr->mod_counter;
2287 c_label.serial_number = raidPtr->serial_number;
2288 c_label.row = srow;
2289 c_label.column = scol;
2290 c_label.num_rows = raidPtr->numRow;
2291 c_label.num_columns = raidPtr->numCol;
2292 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2293 c_label.status = rf_ds_optimal;
2294 raidwrite_component_label(
2295 raidPtr->Disks[0][sparecol].dev,
2296 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2297 &c_label);
2298 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2299 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2300 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2301 raidPtr->mod_counter);
2302 }
2303 }
2304 }
2305 /* printf("Component labels updated\n"); */
2306 }
2307