rf_netbsdkintf.c revision 1.18 1 /* $NetBSD: rf_netbsdkintf.c,v 1.18 1999/04/12 19:39:59 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
181
182 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
183 * spare table */
184 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
185 * installation process */
186
187 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
188 * reconstruction
189 * requests */
190
191
192 decl_simple_lock_data(, recon_queue_mutex)
193 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
194 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
195
196 /* prototypes */
197 static void KernelWakeupFunc(struct buf * bp);
198 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
199 dev_t dev, RF_SectorNum_t startSect,
200 RF_SectorCount_t numSect, caddr_t buf,
201 void (*cbFunc) (struct buf *), void *cbArg,
202 int logBytesPerSector, struct proc * b_proc);
203
204 #define Dprintf0(s) if (rf_queueDebug) \
205 rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
206 #define Dprintf1(s,a) if (rf_queueDebug) \
207 rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf2(s,a,b) if (rf_queueDebug) \
209 rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
211 rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
212
213 int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
214 int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
215
216 void raidattach __P((int));
217 int raidsize __P((dev_t));
218
219 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
220 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
221 static int raidinit __P((dev_t, RF_Raid_t *, int));
222
223 int raidopen __P((dev_t, int, int, struct proc *));
224 int raidclose __P((dev_t, int, int, struct proc *));
225 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
226 int raidwrite __P((dev_t, struct uio *, int));
227 int raidread __P((dev_t, struct uio *, int));
228 void raidstrategy __P((struct buf *));
229 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
230
231 int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
232 int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
233 void rf_update_component_labels( RF_Raid_t *);
234 /*
235 * Pilfered from ccd.c
236 */
237
238 struct raidbuf {
239 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
240 struct buf *rf_obp; /* ptr. to original I/O buf */
241 int rf_flags; /* misc. flags */
242 RF_DiskQueueData_t *req;/* the request that this was part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_flags; /* flags */
254 int sc_cflags; /* configuration flags */
255 size_t sc_size; /* size of the raid device */
256 dev_t sc_dev; /* our device.. */
257 char sc_xname[20]; /* XXX external name */
258 struct disk sc_dkdev; /* generic disk device info */
259 struct pool sc_cbufpool; /* component buffer pool */
260 };
261 /* sc_flags */
262 #define RAIDF_INITED 0x01 /* unit has been initialized */
263 #define RAIDF_WLABEL 0x02 /* label area is writable */
264 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
265 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
266 #define RAIDF_LOCKED 0x80 /* unit is locked */
267
268 #define raidunit(x) DISKUNIT(x)
269 static int numraid = 0;
270
271 #define RAIDLABELDEV(dev) \
272 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
273
274 /* declared here, and made public, for the benefit of KVM stuff.. */
275 struct raid_softc *raid_softc;
276
277 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
278 struct disklabel *));
279 static void raidgetdisklabel __P((dev_t));
280 static void raidmakedisklabel __P((struct raid_softc *));
281
282 static int raidlock __P((struct raid_softc *));
283 static void raidunlock __P((struct raid_softc *));
284 int raidlookup __P((char *, struct proc * p, struct vnode **));
285
286 static void rf_markalldirty __P((RF_Raid_t *));
287
288 void
289 raidattach(num)
290 int num;
291 {
292 int raidID;
293 int i, rc;
294
295 #ifdef DEBUG
296 printf("raidattach: Asked for %d units\n", num);
297 #endif
298
299 if (num <= 0) {
300 #ifdef DIAGNOSTIC
301 panic("raidattach: count <= 0");
302 #endif
303 return;
304 }
305 /* This is where all the initialization stuff gets done. */
306
307 /* Make some space for requested number of units... */
308
309 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
310 if (raidPtrs == NULL) {
311 panic("raidPtrs is NULL!!\n");
312 }
313
314 rc = rf_mutex_init(&rf_sparet_wait_mutex);
315 if (rc) {
316 RF_PANIC();
317 }
318
319 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
320 recon_queue = NULL;
321
322 for (i = 0; i < numraid; i++)
323 raidPtrs[i] = NULL;
324 rc = rf_BootRaidframe();
325 if (rc == 0)
326 printf("Kernelized RAIDframe activated\n");
327 else
328 panic("Serious error booting RAID!!\n");
329
330 rf_kbooted = RFK_BOOT_GOOD;
331
332 /* put together some datastructures like the CCD device does.. This
333 * lets us lock the device and what-not when it gets opened. */
334
335 raid_softc = (struct raid_softc *)
336 malloc(num * sizeof(struct raid_softc),
337 M_RAIDFRAME, M_NOWAIT);
338 if (raid_softc == NULL) {
339 printf("WARNING: no memory for RAIDframe driver\n");
340 return;
341 }
342 numraid = num;
343 bzero(raid_softc, num * sizeof(struct raid_softc));
344
345 for (raidID = 0; raidID < num; raidID++) {
346 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
347 (RF_Raid_t *));
348 if (raidPtrs[raidID] == NULL) {
349 printf("raidPtrs[%d] is NULL\n", raidID);
350 }
351 }
352 }
353
354
355 int
356 raidsize(dev)
357 dev_t dev;
358 {
359 struct raid_softc *rs;
360 struct disklabel *lp;
361 int part, unit, omask, size;
362
363 unit = raidunit(dev);
364 if (unit >= numraid)
365 return (-1);
366 rs = &raid_softc[unit];
367
368 if ((rs->sc_flags & RAIDF_INITED) == 0)
369 return (-1);
370
371 part = DISKPART(dev);
372 omask = rs->sc_dkdev.dk_openmask & (1 << part);
373 lp = rs->sc_dkdev.dk_label;
374
375 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
376 return (-1);
377
378 if (lp->d_partitions[part].p_fstype != FS_SWAP)
379 size = -1;
380 else
381 size = lp->d_partitions[part].p_size *
382 (lp->d_secsize / DEV_BSIZE);
383
384 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
385 return (-1);
386
387 return (size);
388
389 }
390
391 int
392 raiddump(dev, blkno, va, size)
393 dev_t dev;
394 daddr_t blkno;
395 caddr_t va;
396 size_t size;
397 {
398 /* Not implemented. */
399 return ENXIO;
400 }
401 /* ARGSUSED */
402 int
403 raidopen(dev, flags, fmt, p)
404 dev_t dev;
405 int flags, fmt;
406 struct proc *p;
407 {
408 int unit = raidunit(dev);
409 struct raid_softc *rs;
410 struct disklabel *lp;
411 int part, pmask;
412 int error = 0;
413
414 if (unit >= numraid)
415 return (ENXIO);
416 rs = &raid_softc[unit];
417
418 if ((error = raidlock(rs)) != 0)
419 return (error);
420 lp = rs->sc_dkdev.dk_label;
421
422 part = DISKPART(dev);
423 pmask = (1 << part);
424
425 db1_printf(("Opening raid device number: %d partition: %d\n",
426 unit, part));
427
428
429 if ((rs->sc_flags & RAIDF_INITED) &&
430 (rs->sc_dkdev.dk_openmask == 0))
431 raidgetdisklabel(dev);
432
433 /* make sure that this partition exists */
434
435 if (part != RAW_PART) {
436 db1_printf(("Not a raw partition..\n"));
437 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
438 ((part >= lp->d_npartitions) ||
439 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
440 error = ENXIO;
441 raidunlock(rs);
442 db1_printf(("Bailing out...\n"));
443 return (error);
444 }
445 }
446 /* Prevent this unit from being unconfigured while open. */
447 switch (fmt) {
448 case S_IFCHR:
449 rs->sc_dkdev.dk_copenmask |= pmask;
450 break;
451
452 case S_IFBLK:
453 rs->sc_dkdev.dk_bopenmask |= pmask;
454 break;
455 }
456
457 if ((rs->sc_dkdev.dk_openmask == 0) &&
458 ((rs->sc_flags & RAIDF_INITED) != 0)) {
459 /* First one... mark things as dirty... Note that we *MUST*
460 have done a configure before this. I DO NOT WANT TO BE
461 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
462 THAT THEY BELONG TOGETHER!!!!! */
463 /* XXX should check to see if we're only open for reading
464 here... If so, we needn't do this, but then need some
465 other way of keeping track of what's happened.. */
466
467 rf_markalldirty( raidPtrs[unit] );
468 }
469
470
471 rs->sc_dkdev.dk_openmask =
472 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
473
474 raidunlock(rs);
475
476 return (error);
477
478
479 }
480 /* ARGSUSED */
481 int
482 raidclose(dev, flags, fmt, p)
483 dev_t dev;
484 int flags, fmt;
485 struct proc *p;
486 {
487 int unit = raidunit(dev);
488 struct raid_softc *rs;
489 int error = 0;
490 int part;
491
492 if (unit >= numraid)
493 return (ENXIO);
494 rs = &raid_softc[unit];
495
496 if ((error = raidlock(rs)) != 0)
497 return (error);
498
499 part = DISKPART(dev);
500
501 /* ...that much closer to allowing unconfiguration... */
502 switch (fmt) {
503 case S_IFCHR:
504 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
505 break;
506
507 case S_IFBLK:
508 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
509 break;
510 }
511 rs->sc_dkdev.dk_openmask =
512 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
513
514 if ((rs->sc_dkdev.dk_openmask == 0) &&
515 ((rs->sc_flags & RAIDF_INITED) != 0)) {
516 /* Last one... device is not unconfigured yet.
517 Device shutdown has taken care of setting the
518 clean bits if RAIDF_INITED is not set
519 mark things as clean... */
520 rf_update_component_labels( raidPtrs[unit] );
521 }
522
523 raidunlock(rs);
524 return (0);
525
526 }
527
528 void
529 raidstrategy(bp)
530 register struct buf *bp;
531 {
532 register int s;
533
534 unsigned int raidID = raidunit(bp->b_dev);
535 RF_Raid_t *raidPtr;
536 struct raid_softc *rs = &raid_softc[raidID];
537 struct disklabel *lp;
538 int wlabel;
539
540 #if 0
541 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
542 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
543 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
544 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
545 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
546
547 if (bp->b_flags & B_READ)
548 db1_printf(("READ\n"));
549 else
550 db1_printf(("WRITE\n"));
551 #endif
552 if (rf_kbooted != RFK_BOOT_GOOD)
553 return;
554 if (raidID >= numraid || !raidPtrs[raidID]) {
555 bp->b_error = ENODEV;
556 bp->b_flags |= B_ERROR;
557 bp->b_resid = bp->b_bcount;
558 biodone(bp);
559 return;
560 }
561 raidPtr = raidPtrs[raidID];
562 if (!raidPtr->valid) {
563 bp->b_error = ENODEV;
564 bp->b_flags |= B_ERROR;
565 bp->b_resid = bp->b_bcount;
566 biodone(bp);
567 return;
568 }
569 if (bp->b_bcount == 0) {
570 db1_printf(("b_bcount is zero..\n"));
571 biodone(bp);
572 return;
573 }
574 lp = rs->sc_dkdev.dk_label;
575
576 /*
577 * Do bounds checking and adjust transfer. If there's an
578 * error, the bounds check will flag that for us.
579 */
580
581 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
582 if (DISKPART(bp->b_dev) != RAW_PART)
583 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
584 db1_printf(("Bounds check failed!!:%d %d\n",
585 (int) bp->b_blkno, (int) wlabel));
586 biodone(bp);
587 return;
588 }
589 s = splbio(); /* XXX Needed? */
590 db1_printf(("Beginning strategy...\n"));
591
592 bp->b_resid = 0;
593 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
594 NULL, NULL, NULL);
595 if (bp->b_error) {
596 bp->b_flags |= B_ERROR;
597 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
598 bp->b_error));
599 }
600 splx(s);
601 #if 0
602 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
603 bp, bp->b_data,
604 (int) bp->b_bcount, (int) bp->b_resid));
605 #endif
606 }
607 /* ARGSUSED */
608 int
609 raidread(dev, uio, flags)
610 dev_t dev;
611 struct uio *uio;
612 int flags;
613 {
614 int unit = raidunit(dev);
615 struct raid_softc *rs;
616 int part;
617
618 if (unit >= numraid)
619 return (ENXIO);
620 rs = &raid_softc[unit];
621
622 if ((rs->sc_flags & RAIDF_INITED) == 0)
623 return (ENXIO);
624 part = DISKPART(dev);
625
626 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
627
628 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
629
630 }
631 /* ARGSUSED */
632 int
633 raidwrite(dev, uio, flags)
634 dev_t dev;
635 struct uio *uio;
636 int flags;
637 {
638 int unit = raidunit(dev);
639 struct raid_softc *rs;
640
641 if (unit >= numraid)
642 return (ENXIO);
643 rs = &raid_softc[unit];
644
645 if ((rs->sc_flags & RAIDF_INITED) == 0)
646 return (ENXIO);
647 db1_printf(("raidwrite\n"));
648 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
649
650 }
651
652 int
653 raidioctl(dev, cmd, data, flag, p)
654 dev_t dev;
655 u_long cmd;
656 caddr_t data;
657 int flag;
658 struct proc *p;
659 {
660 int unit = raidunit(dev);
661 int error = 0;
662 int part, pmask;
663 struct raid_softc *rs;
664 #if 0
665 int r, c;
666 #endif
667 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
668
669 /* struct ccdbuf *cbp; */
670 /* struct raidbuf *raidbp; */
671 RF_Config_t *k_cfg, *u_cfg;
672 u_char *specific_buf;
673 int retcode = 0;
674 int row;
675 int column;
676 struct rf_recon_req *rrcopy, *rr;
677 RF_ComponentLabel_t *component_label;
678 RF_ComponentLabel_t ci_label;
679 RF_ComponentLabel_t **c_label_ptr;
680 RF_SingleComponent_t *sparePtr,*componentPtr;
681 RF_SingleComponent_t hot_spare;
682 RF_SingleComponent_t component;
683
684 if (unit >= numraid)
685 return (ENXIO);
686 rs = &raid_softc[unit];
687
688 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
689 (int) DISKPART(dev), (int) unit, (int) cmd));
690
691 /* Must be open for writes for these commands... */
692 switch (cmd) {
693 case DIOCSDINFO:
694 case DIOCWDINFO:
695 case DIOCWLABEL:
696 if ((flag & FWRITE) == 0)
697 return (EBADF);
698 }
699
700 /* Must be initialized for these... */
701 switch (cmd) {
702 case DIOCGDINFO:
703 case DIOCSDINFO:
704 case DIOCWDINFO:
705 case DIOCGPART:
706 case DIOCWLABEL:
707 case DIOCGDEFLABEL:
708 case RAIDFRAME_SHUTDOWN:
709 case RAIDFRAME_REWRITEPARITY:
710 case RAIDFRAME_GET_INFO:
711 case RAIDFRAME_RESET_ACCTOTALS:
712 case RAIDFRAME_GET_ACCTOTALS:
713 case RAIDFRAME_KEEP_ACCTOTALS:
714 case RAIDFRAME_GET_SIZE:
715 case RAIDFRAME_FAIL_DISK:
716 case RAIDFRAME_COPYBACK:
717 case RAIDFRAME_CHECKRECON:
718 case RAIDFRAME_GET_COMPONENT_LABEL:
719 case RAIDFRAME_SET_COMPONENT_LABEL:
720 case RAIDFRAME_ADD_HOT_SPARE:
721 case RAIDFRAME_REMOVE_HOT_SPARE:
722 case RAIDFRAME_INIT_LABELS:
723 case RAIDFRAME_REBUILD_IN_PLACE:
724 if ((rs->sc_flags & RAIDF_INITED) == 0)
725 return (ENXIO);
726 }
727
728 switch (cmd) {
729
730
731 /* configure the system */
732 case RAIDFRAME_CONFIGURE:
733
734 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
735 /* copy-in the configuration information */
736 /* data points to a pointer to the configuration structure */
737 u_cfg = *((RF_Config_t **) data);
738 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
739 if (k_cfg == NULL) {
740 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
741 return (ENOMEM);
742 }
743 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
744 sizeof(RF_Config_t));
745 if (retcode) {
746 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
747 retcode));
748 return (retcode);
749 }
750 /* allocate a buffer for the layout-specific data, and copy it
751 * in */
752 if (k_cfg->layoutSpecificSize) {
753 if (k_cfg->layoutSpecificSize > 10000) {
754 /* sanity check */
755 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
756 return (EINVAL);
757 }
758 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
759 (u_char *));
760 if (specific_buf == NULL) {
761 RF_Free(k_cfg, sizeof(RF_Config_t));
762 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
763 return (ENOMEM);
764 }
765 retcode = copyin(k_cfg->layoutSpecific,
766 (caddr_t) specific_buf,
767 k_cfg->layoutSpecificSize);
768 if (retcode) {
769 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
770 retcode));
771 return (retcode);
772 }
773 } else
774 specific_buf = NULL;
775 k_cfg->layoutSpecific = specific_buf;
776
777 /* should do some kind of sanity check on the configuration.
778 * Store the sum of all the bytes in the last byte? */
779
780 #if 0
781 db1_printf(("Considering configuring the system.:%d 0x%x\n",
782 unit, p));
783 #endif
784
785 /* We need the pointer to this a little deeper, so stash it
786 * here... */
787
788 raidPtrs[unit]->proc = p;
789
790 /* configure the system */
791
792 raidPtrs[unit]->raidid = unit;
793 retcode = rf_Configure(raidPtrs[unit], k_cfg);
794
795
796 if (retcode == 0) {
797 retcode = raidinit(dev, raidPtrs[unit], unit);
798 rf_markalldirty( raidPtrs[unit] );
799 }
800 /* free the buffers. No return code here. */
801 if (k_cfg->layoutSpecificSize) {
802 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
803 }
804 RF_Free(k_cfg, sizeof(RF_Config_t));
805
806 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
807 retcode));
808
809 return (retcode);
810
811 /* shutdown the system */
812 case RAIDFRAME_SHUTDOWN:
813
814 if ((error = raidlock(rs)) != 0)
815 return (error);
816
817 /*
818 * If somebody has a partition mounted, we shouldn't
819 * shutdown.
820 */
821
822 part = DISKPART(dev);
823 pmask = (1 << part);
824 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
825 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
826 (rs->sc_dkdev.dk_copenmask & pmask))) {
827 raidunlock(rs);
828 return (EBUSY);
829 }
830
831 if (rf_debugKernelAccess) {
832 printf("call shutdown\n");
833 }
834 raidPtrs[unit]->proc = p; /* XXX necessary evil */
835
836 retcode = rf_Shutdown(raidPtrs[unit]);
837
838 db1_printf(("Done main shutdown\n"));
839
840 pool_destroy(&rs->sc_cbufpool);
841 db1_printf(("Done freeing component buffer freelist\n"));
842
843 /* It's no longer initialized... */
844 rs->sc_flags &= ~RAIDF_INITED;
845
846 /* Detach the disk. */
847 disk_detach(&rs->sc_dkdev);
848
849 raidunlock(rs);
850
851 return (retcode);
852 case RAIDFRAME_GET_COMPONENT_LABEL:
853 c_label_ptr = (RF_ComponentLabel_t **) data;
854 /* need to read the component label for the disk indicated
855 by row,column in component_label
856 XXX need to sanity check these values!!!
857 */
858
859 /* For practice, let's get it directly fromdisk, rather
860 than from the in-core copy */
861 RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
862 (RF_ComponentLabel_t *));
863 if (component_label == NULL)
864 return (ENOMEM);
865
866 bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
867
868 retcode = copyin( *c_label_ptr, component_label,
869 sizeof(RF_ComponentLabel_t));
870
871 if (retcode) {
872 return(retcode);
873 }
874
875 row = component_label->row;
876 printf("Row: %d\n",row);
877 if (row > raidPtrs[unit]->numRow) {
878 row = 0; /* XXX */
879 }
880 column = component_label->column;
881 printf("Column: %d\n",column);
882 if (column > raidPtrs[unit]->numCol) {
883 column = 0; /* XXX */
884 }
885
886 raidread_component_label(
887 raidPtrs[unit]->Disks[row][column].dev,
888 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
889 component_label );
890
891 retcode = copyout((caddr_t) component_label,
892 (caddr_t) *c_label_ptr,
893 sizeof(RF_ComponentLabel_t));
894 RF_Free( component_label, sizeof(RF_ComponentLabel_t));
895 return (retcode);
896
897 case RAIDFRAME_SET_COMPONENT_LABEL:
898 component_label = (RF_ComponentLabel_t *) data;
899
900 /* XXX check the label for valid stuff... */
901 /* Note that some things *should not* get modified --
902 the user should be re-initing the labels instead of
903 trying to patch things.
904 */
905
906 printf("Got component label:\n");
907 printf("Version: %d\n",component_label->version);
908 printf("Serial Number: %d\n",component_label->serial_number);
909 printf("Mod counter: %d\n",component_label->mod_counter);
910 printf("Row: %d\n", component_label->row);
911 printf("Column: %d\n", component_label->column);
912 printf("Num Rows: %d\n", component_label->num_rows);
913 printf("Num Columns: %d\n", component_label->num_columns);
914 printf("Clean: %d\n", component_label->clean);
915 printf("Status: %d\n", component_label->status);
916
917 row = component_label->row;
918 column = component_label->column;
919
920 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
921 (column < 0) || (column > raidPtrs[unit]->numCol)) {
922 return(EINVAL);
923 }
924
925 /* XXX this isn't allowed to do anything for now :-) */
926 #if 0
927 raidwrite_component_label(
928 raidPtrs[unit]->Disks[row][column].dev,
929 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
930 component_label );
931 #endif
932 return (0);
933
934 case RAIDFRAME_INIT_LABELS:
935 component_label = (RF_ComponentLabel_t *) data;
936 /*
937 we only want the serial number from
938 the above. We get all the rest of the information
939 from the config that was used to create this RAID
940 set.
941 */
942
943 raidPtrs[unit]->serial_number = component_label->serial_number;
944 /* current version number */
945 ci_label.version = RF_COMPONENT_LABEL_VERSION;
946 ci_label.serial_number = component_label->serial_number;
947 ci_label.mod_counter = raidPtrs[unit]->mod_counter;
948 ci_label.num_rows = raidPtrs[unit]->numRow;
949 ci_label.num_columns = raidPtrs[unit]->numCol;
950 ci_label.clean = RF_RAID_DIRTY; /* not clean */
951 ci_label.status = rf_ds_optimal; /* "It's good!" */
952
953 for(row=0;row<raidPtrs[unit]->numRow;row++) {
954 ci_label.row = row;
955 for(column=0;column<raidPtrs[unit]->numCol;column++) {
956 ci_label.column = column;
957 raidwrite_component_label(
958 raidPtrs[unit]->Disks[row][column].dev,
959 raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
960 &ci_label );
961 }
962 }
963
964 return (retcode);
965
966 /* initialize all parity */
967 case RAIDFRAME_REWRITEPARITY:
968
969 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
970 /* Parity for RAID 0 is trivially correct */
971 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
972 return(0);
973 }
974
975 /* borrow the thread of the requesting process */
976 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
977 retcode = rf_RewriteParity(raidPtrs[unit]);
978 /* return I/O Error if the parity rewrite fails */
979
980 if (retcode) {
981 retcode = EIO;
982 } else {
983 /* set the clean bit! If we shutdown correctly,
984 the clean bit on each component label will get
985 set */
986 raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
987 }
988 return (retcode);
989
990
991 case RAIDFRAME_ADD_HOT_SPARE:
992 sparePtr = (RF_SingleComponent_t *) data;
993 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
994 printf("Adding spare\n");
995 retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
996 return(retcode);
997
998 case RAIDFRAME_REMOVE_HOT_SPARE:
999 return(retcode);
1000
1001 case RAIDFRAME_REBUILD_IN_PLACE:
1002 componentPtr = (RF_SingleComponent_t *) data;
1003 memcpy( &component, componentPtr,
1004 sizeof(RF_SingleComponent_t));
1005 row = component.row;
1006 column = component.column;
1007 printf("Rebuild: %d %d\n",row, column);
1008 if ((row < 0) || (row > raidPtrs[unit]->numRow) ||
1009 (column < 0) || (column > raidPtrs[unit]->numCol)) {
1010 return(EINVAL);
1011 }
1012 printf("Attempting a rebuild in place\n");
1013 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
1014 retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1015 return(retcode);
1016
1017 /* issue a test-unit-ready through raidframe to the indicated
1018 * device */
1019 #if 0 /* XXX not supported yet (ever?) */
1020 case RAIDFRAME_TUR:
1021 /* debug only */
1022 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
1023 return (retcode);
1024 #endif
1025 case RAIDFRAME_GET_INFO:
1026 {
1027 RF_Raid_t *raid = raidPtrs[unit];
1028 RF_DeviceConfig_t *cfg, **ucfgp;
1029 int i, j, d;
1030
1031 if (!raid->valid)
1032 return (ENODEV);
1033 ucfgp = (RF_DeviceConfig_t **) data;
1034 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1035 (RF_DeviceConfig_t *));
1036 if (cfg == NULL)
1037 return (ENOMEM);
1038 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1039 cfg->rows = raid->numRow;
1040 cfg->cols = raid->numCol;
1041 cfg->ndevs = raid->numRow * raid->numCol;
1042 if (cfg->ndevs >= RF_MAX_DISKS) {
1043 cfg->ndevs = 0;
1044 return (ENOMEM);
1045 }
1046 cfg->nspares = raid->numSpare;
1047 if (cfg->nspares >= RF_MAX_DISKS) {
1048 cfg->nspares = 0;
1049 return (ENOMEM);
1050 }
1051 cfg->maxqdepth = raid->maxQueueDepth;
1052 d = 0;
1053 for (i = 0; i < cfg->rows; i++) {
1054 for (j = 0; j < cfg->cols; j++) {
1055 cfg->devs[d] = raid->Disks[i][j];
1056 d++;
1057 }
1058 }
1059 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1060 cfg->spares[i] = raid->Disks[0][j];
1061 }
1062 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1063 sizeof(RF_DeviceConfig_t));
1064 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
1065
1066 return (retcode);
1067 }
1068 break;
1069
1070 case RAIDFRAME_RESET_ACCTOTALS:
1071 {
1072 RF_Raid_t *raid = raidPtrs[unit];
1073
1074 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1075 return (0);
1076 }
1077 break;
1078
1079 case RAIDFRAME_GET_ACCTOTALS:
1080 {
1081 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1082 RF_Raid_t *raid = raidPtrs[unit];
1083
1084 *totals = raid->acc_totals;
1085 return (0);
1086 }
1087 break;
1088
1089 case RAIDFRAME_KEEP_ACCTOTALS:
1090 {
1091 RF_Raid_t *raid = raidPtrs[unit];
1092 int *keep = (int *) data;
1093
1094 raid->keep_acc_totals = *keep;
1095 return (0);
1096 }
1097 break;
1098
1099 case RAIDFRAME_GET_SIZE:
1100 *(int *) data = raidPtrs[unit]->totalSectors;
1101 return (0);
1102
1103 #define RAIDFRAME_RECON 1
1104 /* XXX The above should probably be set somewhere else!! GO */
1105 #if RAIDFRAME_RECON > 0
1106
1107 /* fail a disk & optionally start reconstruction */
1108 case RAIDFRAME_FAIL_DISK:
1109 rr = (struct rf_recon_req *) data;
1110
1111 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1112 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1113 return (EINVAL);
1114
1115 printf("raid%d: Failing the disk: row: %d col: %d\n",
1116 unit, rr->row, rr->col);
1117
1118 /* make a copy of the recon request so that we don't rely on
1119 * the user's buffer */
1120 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1121 bcopy(rr, rrcopy, sizeof(*rr));
1122 rrcopy->raidPtr = (void *) raidPtrs[unit];
1123
1124 LOCK_RECON_Q_MUTEX();
1125 rrcopy->next = recon_queue;
1126 recon_queue = rrcopy;
1127 wakeup(&recon_queue);
1128 UNLOCK_RECON_Q_MUTEX();
1129
1130 return (0);
1131
1132 /* invoke a copyback operation after recon on whatever disk
1133 * needs it, if any */
1134 case RAIDFRAME_COPYBACK:
1135 /* borrow the current thread to get this done */
1136 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
1137 rf_CopybackReconstructedData(raidPtrs[unit]);
1138 return (0);
1139
1140 /* return the percentage completion of reconstruction */
1141 case RAIDFRAME_CHECKRECON:
1142 row = *(int *) data;
1143 if (row < 0 || row >= raidPtrs[unit]->numRow)
1144 return (EINVAL);
1145 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1146 *(int *) data = 100;
1147 else
1148 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1149 return (0);
1150
1151 /* the sparetable daemon calls this to wait for the kernel to
1152 * need a spare table. this ioctl does not return until a
1153 * spare table is needed. XXX -- calling mpsleep here in the
1154 * ioctl code is almost certainly wrong and evil. -- XXX XXX
1155 * -- I should either compute the spare table in the kernel,
1156 * or have a different -- XXX XXX -- interface (a different
1157 * character device) for delivering the table -- XXX */
1158 #if 0
1159 case RAIDFRAME_SPARET_WAIT:
1160 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1161 while (!rf_sparet_wait_queue)
1162 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1163 waitreq = rf_sparet_wait_queue;
1164 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1165 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1166
1167 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1168
1169 RF_Free(waitreq, sizeof(*waitreq));
1170 return (0);
1171
1172
1173 /* wakes up a process waiting on SPARET_WAIT and puts an error
1174 * code in it that will cause the dameon to exit */
1175 case RAIDFRAME_ABORT_SPARET_WAIT:
1176 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1177 waitreq->fcol = -1;
1178 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1179 waitreq->next = rf_sparet_wait_queue;
1180 rf_sparet_wait_queue = waitreq;
1181 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1182 wakeup(&rf_sparet_wait_queue);
1183 return (0);
1184
1185 /* used by the spare table daemon to deliver a spare table
1186 * into the kernel */
1187 case RAIDFRAME_SEND_SPARET:
1188
1189 /* install the spare table */
1190 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1191
1192 /* respond to the requestor. the return status of the spare
1193 * table installation is passed in the "fcol" field */
1194 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1195 waitreq->fcol = retcode;
1196 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1197 waitreq->next = rf_sparet_resp_queue;
1198 rf_sparet_resp_queue = waitreq;
1199 wakeup(&rf_sparet_resp_queue);
1200 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1201
1202 return (retcode);
1203 #endif
1204
1205
1206 #endif /* RAIDFRAME_RECON > 0 */
1207
1208 default:
1209 break; /* fall through to the os-specific code below */
1210
1211 }
1212
1213 if (!raidPtrs[unit]->valid)
1214 return (EINVAL);
1215
1216 /*
1217 * Add support for "regular" device ioctls here.
1218 */
1219
1220 switch (cmd) {
1221 case DIOCGDINFO:
1222 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1223 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1224 break;
1225
1226 case DIOCGPART:
1227 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1228 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1229 ((struct partinfo *) data)->part =
1230 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1231 break;
1232
1233 case DIOCWDINFO:
1234 db1_printf(("DIOCWDINFO\n"));
1235 case DIOCSDINFO:
1236 db1_printf(("DIOCSDINFO\n"));
1237 if ((error = raidlock(rs)) != 0)
1238 return (error);
1239
1240 rs->sc_flags |= RAIDF_LABELLING;
1241
1242 error = setdisklabel(rs->sc_dkdev.dk_label,
1243 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1244 if (error == 0) {
1245 if (cmd == DIOCWDINFO)
1246 error = writedisklabel(RAIDLABELDEV(dev),
1247 raidstrategy, rs->sc_dkdev.dk_label,
1248 rs->sc_dkdev.dk_cpulabel);
1249 }
1250 rs->sc_flags &= ~RAIDF_LABELLING;
1251
1252 raidunlock(rs);
1253
1254 if (error)
1255 return (error);
1256 break;
1257
1258 case DIOCWLABEL:
1259 db1_printf(("DIOCWLABEL\n"));
1260 if (*(int *) data != 0)
1261 rs->sc_flags |= RAIDF_WLABEL;
1262 else
1263 rs->sc_flags &= ~RAIDF_WLABEL;
1264 break;
1265
1266 case DIOCGDEFLABEL:
1267 db1_printf(("DIOCGDEFLABEL\n"));
1268 raidgetdefaultlabel(raidPtrs[unit], rs,
1269 (struct disklabel *) data);
1270 break;
1271
1272 default:
1273 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1274 }
1275 return (retcode);
1276
1277 }
1278
1279
1280 /* raidinit -- complete the rest of the initialization for the
1281 RAIDframe device. */
1282
1283
1284 static int
1285 raidinit(dev, raidPtr, unit)
1286 dev_t dev;
1287 RF_Raid_t *raidPtr;
1288 int unit;
1289 {
1290 int retcode;
1291 /* int ix; */
1292 /* struct raidbuf *raidbp; */
1293 struct raid_softc *rs;
1294
1295 retcode = 0;
1296
1297 rs = &raid_softc[unit];
1298 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1299 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1300
1301
1302 /* XXX should check return code first... */
1303 rs->sc_flags |= RAIDF_INITED;
1304
1305 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1306
1307 rs->sc_dkdev.dk_name = rs->sc_xname;
1308
1309 /* disk_attach actually creates space for the CPU disklabel, among
1310 * other things, so it's critical to call this *BEFORE* we try putzing
1311 * with disklabels. */
1312
1313 disk_attach(&rs->sc_dkdev);
1314
1315 /* XXX There may be a weird interaction here between this, and
1316 * protectedSectors, as used in RAIDframe. */
1317
1318 rs->sc_size = raidPtr->totalSectors;
1319 rs->sc_dev = dev;
1320
1321 return (retcode);
1322 }
1323
1324 /*
1325 * This kernel thread never exits. It is created once, and persists
1326 * until the system reboots.
1327 */
1328
1329 void
1330 rf_ReconKernelThread()
1331 {
1332 struct rf_recon_req *req;
1333 int s;
1334
1335 /* XXX not sure what spl() level we should be at here... probably
1336 * splbio() */
1337 s = splbio();
1338
1339 while (1) {
1340 /* grab the next reconstruction request from the queue */
1341 LOCK_RECON_Q_MUTEX();
1342 while (!recon_queue) {
1343 UNLOCK_RECON_Q_MUTEX();
1344 tsleep(&recon_queue, PRIBIO,
1345 "raidframe recon", 0);
1346 LOCK_RECON_Q_MUTEX();
1347 }
1348 req = recon_queue;
1349 recon_queue = recon_queue->next;
1350 UNLOCK_RECON_Q_MUTEX();
1351
1352 /*
1353 * If flags specifies that we should start recon, this call
1354 * will not return until reconstruction completes, fails,
1355 * or is aborted.
1356 */
1357 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1358 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1359
1360 RF_Free(req, sizeof(*req));
1361 }
1362 }
1363 /* wake up the daemon & tell it to get us a spare table
1364 * XXX
1365 * the entries in the queues should be tagged with the raidPtr
1366 * so that in the extremely rare case that two recons happen at once,
1367 * we know for which device were requesting a spare table
1368 * XXX
1369 */
1370 int
1371 rf_GetSpareTableFromDaemon(req)
1372 RF_SparetWait_t *req;
1373 {
1374 int retcode;
1375
1376 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1377 req->next = rf_sparet_wait_queue;
1378 rf_sparet_wait_queue = req;
1379 wakeup(&rf_sparet_wait_queue);
1380
1381 /* mpsleep unlocks the mutex */
1382 while (!rf_sparet_resp_queue) {
1383 tsleep(&rf_sparet_resp_queue, PRIBIO,
1384 "raidframe getsparetable", 0);
1385 #if 0
1386 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0,
1387 (void *) simple_lock_addr(rf_sparet_wait_mutex),
1388 MS_LOCK_SIMPLE);
1389 #endif
1390 }
1391 req = rf_sparet_resp_queue;
1392 rf_sparet_resp_queue = req->next;
1393 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1394
1395 retcode = req->fcol;
1396 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1397 * alloc'd */
1398 return (retcode);
1399 }
1400 /* a wrapper around rf_DoAccess that extracts appropriate info from the
1401 * bp & passes it down.
1402 * any calls originating in the kernel must use non-blocking I/O
1403 * do some extra sanity checking to return "appropriate" error values for
1404 * certain conditions (to make some standard utilities work)
1405 */
1406 int
1407 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1408 RF_Raid_t *raidPtr;
1409 struct buf *bp;
1410 RF_RaidAccessFlags_t flags;
1411 void (*cbFunc) (struct buf *);
1412 void *cbArg;
1413 {
1414 RF_SectorCount_t num_blocks, pb, sum;
1415 RF_RaidAddr_t raid_addr;
1416 int retcode;
1417 struct partition *pp;
1418 daddr_t blocknum;
1419 int unit;
1420 struct raid_softc *rs;
1421 int do_async;
1422
1423 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1424
1425 unit = raidPtr->raidid;
1426 rs = &raid_softc[unit];
1427
1428 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1429 * partition.. Need to make it absolute to the underlying device.. */
1430
1431 blocknum = bp->b_blkno;
1432 if (DISKPART(bp->b_dev) != RAW_PART) {
1433 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1434 blocknum += pp->p_offset;
1435 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1436 pp->p_offset));
1437 } else {
1438 db1_printf(("Is raw..\n"));
1439 }
1440 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1441
1442 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1443 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1444
1445 /* *THIS* is where we adjust what block we're going to... but DO NOT
1446 * TOUCH bp->b_blkno!!! */
1447 raid_addr = blocknum;
1448
1449 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1450 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1451 sum = raid_addr + num_blocks + pb;
1452 if (1 || rf_debugKernelAccess) {
1453 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1454 (int) raid_addr, (int) sum, (int) num_blocks,
1455 (int) pb, (int) bp->b_resid));
1456 }
1457 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1458 || (sum < num_blocks) || (sum < pb)) {
1459 bp->b_error = ENOSPC;
1460 bp->b_flags |= B_ERROR;
1461 bp->b_resid = bp->b_bcount;
1462 biodone(bp);
1463 return (bp->b_error);
1464 }
1465 /*
1466 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1467 */
1468
1469 if (bp->b_bcount & raidPtr->sectorMask) {
1470 bp->b_error = EINVAL;
1471 bp->b_flags |= B_ERROR;
1472 bp->b_resid = bp->b_bcount;
1473 biodone(bp);
1474 return (bp->b_error);
1475 }
1476 db1_printf(("Calling DoAccess..\n"));
1477
1478 /*
1479 * XXX For now, all writes are sync
1480 */
1481 do_async = 1;
1482 if ((bp->b_flags & B_READ) == 0)
1483 do_async = 0;
1484
1485 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1486 * B_READ instead */
1487 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1488 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1489 do_async, raid_addr, num_blocks,
1490 bp->b_un.b_addr,
1491 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1492 NULL, cbFunc, cbArg);
1493 #if 0
1494 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1495 bp->b_data, (int) bp->b_resid));
1496 #endif
1497
1498 /*
1499 * If we requested sync I/O, sleep here.
1500 */
1501 if ((retcode == 0) && (do_async == 0))
1502 tsleep(bp, PRIBIO, "raidsyncio", 0);
1503
1504 return (retcode);
1505 }
1506 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1507
1508 int
1509 rf_DispatchKernelIO(queue, req)
1510 RF_DiskQueue_t *queue;
1511 RF_DiskQueueData_t *req;
1512 {
1513 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1514 struct buf *bp;
1515 struct raidbuf *raidbp = NULL;
1516 struct raid_softc *rs;
1517 int unit;
1518
1519 /* XXX along with the vnode, we also need the softc associated with
1520 * this device.. */
1521
1522 req->queue = queue;
1523
1524 unit = queue->raidPtr->raidid;
1525
1526 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1527
1528 if (unit >= numraid) {
1529 printf("Invalid unit number: %d %d\n", unit, numraid);
1530 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1531 }
1532 rs = &raid_softc[unit];
1533
1534 /* XXX is this the right place? */
1535 disk_busy(&rs->sc_dkdev);
1536
1537 bp = req->bp;
1538 #if 1
1539 /* XXX when there is a physical disk failure, someone is passing us a
1540 * buffer that contains old stuff!! Attempt to deal with this problem
1541 * without taking a performance hit... (not sure where the real bug
1542 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1543
1544 if (bp->b_flags & B_ERROR) {
1545 bp->b_flags &= ~B_ERROR;
1546 }
1547 if (bp->b_error != 0) {
1548 bp->b_error = 0;
1549 }
1550 #endif
1551 raidbp = RAIDGETBUF(rs);
1552
1553 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1554
1555 /*
1556 * context for raidiodone
1557 */
1558 raidbp->rf_obp = bp;
1559 raidbp->req = req;
1560
1561 switch (req->type) {
1562 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1563 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1564 * queue->row, queue->col); */
1565 /* XXX need to do something extra here.. */
1566 /* I'm leaving this in, as I've never actually seen it used,
1567 * and I'd like folks to report it... GO */
1568 printf(("WAKEUP CALLED\n"));
1569 queue->numOutstanding++;
1570
1571 /* XXX need to glue the original buffer into this?? */
1572
1573 KernelWakeupFunc(&raidbp->rf_buf);
1574 break;
1575
1576 case RF_IO_TYPE_READ:
1577 case RF_IO_TYPE_WRITE:
1578
1579 if (req->tracerec) {
1580 RF_ETIMER_START(req->tracerec->timer);
1581 }
1582 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1583 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1584 req->sectorOffset, req->numSector,
1585 req->buf, KernelWakeupFunc, (void *) req,
1586 queue->raidPtr->logBytesPerSector, req->b_proc);
1587
1588 if (rf_debugKernelAccess) {
1589 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1590 (long) bp->b_blkno));
1591 }
1592 queue->numOutstanding++;
1593 queue->last_deq_sector = req->sectorOffset;
1594 /* acc wouldn't have been let in if there were any pending
1595 * reqs at any other priority */
1596 queue->curPriority = req->priority;
1597 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1598 * req->type, queue->row, queue->col); */
1599
1600 db1_printf(("Going for %c to unit %d row %d col %d\n",
1601 req->type, unit, queue->row, queue->col));
1602 db1_printf(("sector %d count %d (%d bytes) %d\n",
1603 (int) req->sectorOffset, (int) req->numSector,
1604 (int) (req->numSector <<
1605 queue->raidPtr->logBytesPerSector),
1606 (int) queue->raidPtr->logBytesPerSector));
1607 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1608 raidbp->rf_buf.b_vp->v_numoutput++;
1609 }
1610 VOP_STRATEGY(&raidbp->rf_buf);
1611
1612 break;
1613
1614 default:
1615 panic("bad req->type in rf_DispatchKernelIO");
1616 }
1617 db1_printf(("Exiting from DispatchKernelIO\n"));
1618 return (0);
1619 }
1620 /* this is the callback function associated with a I/O invoked from
1621 kernel code.
1622 */
1623 static void
1624 KernelWakeupFunc(vbp)
1625 struct buf *vbp;
1626 {
1627 RF_DiskQueueData_t *req = NULL;
1628 RF_DiskQueue_t *queue;
1629 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1630 struct buf *bp;
1631 struct raid_softc *rs;
1632 int unit;
1633 register int s;
1634
1635 s = splbio(); /* XXX */
1636 db1_printf(("recovering the request queue:\n"));
1637 req = raidbp->req;
1638
1639 bp = raidbp->rf_obp;
1640 #if 0
1641 db1_printf(("bp=0x%x\n", bp));
1642 #endif
1643
1644 queue = (RF_DiskQueue_t *) req->queue;
1645
1646 if (raidbp->rf_buf.b_flags & B_ERROR) {
1647 #if 0
1648 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1649 #endif
1650 bp->b_flags |= B_ERROR;
1651 bp->b_error = raidbp->rf_buf.b_error ?
1652 raidbp->rf_buf.b_error : EIO;
1653 }
1654 #if 0
1655 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1656 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1657 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1658 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1659 #endif
1660
1661 /* XXX methinks this could be wrong... */
1662 #if 1
1663 bp->b_resid = raidbp->rf_buf.b_resid;
1664 #endif
1665
1666 if (req->tracerec) {
1667 RF_ETIMER_STOP(req->tracerec->timer);
1668 RF_ETIMER_EVAL(req->tracerec->timer);
1669 RF_LOCK_MUTEX(rf_tracing_mutex);
1670 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1671 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1672 req->tracerec->num_phys_ios++;
1673 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1674 }
1675 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1676
1677 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1678
1679
1680 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1681 * ballistic, and mark the component as hosed... */
1682 #if 1
1683 if (bp->b_flags & B_ERROR) {
1684 /* Mark the disk as dead */
1685 /* but only mark it once... */
1686 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1687 rf_ds_optimal) {
1688 printf("raid%d: IO Error. Marking %s as failed.\n",
1689 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1690 queue->raidPtr->Disks[queue->row][queue->col].status =
1691 rf_ds_failed;
1692 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1693 queue->raidPtr->numFailures++;
1694 /* XXX here we should bump the version number for each component, and write that data out */
1695 } else { /* Disk is already dead... */
1696 /* printf("Disk already marked as dead!\n"); */
1697 }
1698
1699 }
1700 #endif
1701
1702 rs = &raid_softc[unit];
1703 RAIDPUTBUF(rs, raidbp);
1704
1705
1706 if (bp->b_resid == 0) {
1707 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1708 unit, bp->b_resid, bp->b_bcount));
1709 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1710 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1711 } else {
1712 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1713 }
1714
1715 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1716 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1717 /* printf("Exiting KernelWakeupFunc\n"); */
1718
1719 splx(s); /* XXX */
1720 }
1721
1722
1723
1724 /*
1725 * initialize a buf structure for doing an I/O in the kernel.
1726 */
1727 static void
1728 InitBP(
1729 struct buf * bp,
1730 struct vnode * b_vp,
1731 unsigned rw_flag,
1732 dev_t dev,
1733 RF_SectorNum_t startSect,
1734 RF_SectorCount_t numSect,
1735 caddr_t buf,
1736 void (*cbFunc) (struct buf *),
1737 void *cbArg,
1738 int logBytesPerSector,
1739 struct proc * b_proc)
1740 {
1741 /* bp->b_flags = B_PHYS | rw_flag; */
1742 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1743 bp->b_bcount = numSect << logBytesPerSector;
1744 bp->b_bufsize = bp->b_bcount;
1745 bp->b_error = 0;
1746 bp->b_dev = dev;
1747 db1_printf(("bp->b_dev is %d\n", dev));
1748 bp->b_un.b_addr = buf;
1749 #if 0
1750 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1751 #endif
1752
1753 bp->b_blkno = startSect;
1754 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1755 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1756 if (bp->b_bcount == 0) {
1757 panic("bp->b_bcount is zero in InitBP!!\n");
1758 }
1759 bp->b_proc = b_proc;
1760 bp->b_iodone = cbFunc;
1761 bp->b_vp = b_vp;
1762
1763 }
1764 /* Extras... */
1765
1766 unsigned int
1767 rpcc()
1768 {
1769 /* XXX no clue what this is supposed to do.. my guess is that it's
1770 * supposed to read the CPU cycle counter... */
1771 /* db1_printf("this is supposed to do something useful too!??\n"); */
1772 return (0);
1773 }
1774 #if 0
1775 int
1776 rf_GetSpareTableFromDaemon(req)
1777 RF_SparetWait_t *req;
1778 {
1779 int retcode = 1;
1780 printf("This is supposed to do something useful!!\n"); /* XXX */
1781
1782 return (retcode);
1783
1784 }
1785 #endif
1786
1787 static void
1788 raidgetdefaultlabel(raidPtr, rs, lp)
1789 RF_Raid_t *raidPtr;
1790 struct raid_softc *rs;
1791 struct disklabel *lp;
1792 {
1793 db1_printf(("Building a default label...\n"));
1794 bzero(lp, sizeof(*lp));
1795
1796 /* fabricate a label... */
1797 lp->d_secperunit = raidPtr->totalSectors;
1798 lp->d_secsize = raidPtr->bytesPerSector;
1799 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1800 lp->d_ntracks = 1;
1801 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1802 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1803
1804 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1805 lp->d_type = DTYPE_RAID;
1806 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1807 lp->d_rpm = 3600;
1808 lp->d_interleave = 1;
1809 lp->d_flags = 0;
1810
1811 lp->d_partitions[RAW_PART].p_offset = 0;
1812 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1813 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1814 lp->d_npartitions = RAW_PART + 1;
1815
1816 lp->d_magic = DISKMAGIC;
1817 lp->d_magic2 = DISKMAGIC;
1818 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1819
1820 }
1821 /*
1822 * Read the disklabel from the raid device. If one is not present, fake one
1823 * up.
1824 */
1825 static void
1826 raidgetdisklabel(dev)
1827 dev_t dev;
1828 {
1829 int unit = raidunit(dev);
1830 struct raid_softc *rs = &raid_softc[unit];
1831 char *errstring;
1832 struct disklabel *lp = rs->sc_dkdev.dk_label;
1833 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1834 RF_Raid_t *raidPtr;
1835
1836 db1_printf(("Getting the disklabel...\n"));
1837
1838 bzero(clp, sizeof(*clp));
1839
1840 raidPtr = raidPtrs[unit];
1841
1842 raidgetdefaultlabel(raidPtr, rs, lp);
1843
1844 /*
1845 * Call the generic disklabel extraction routine.
1846 */
1847 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1848 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1849 if (errstring)
1850 raidmakedisklabel(rs);
1851 else {
1852 int i;
1853 struct partition *pp;
1854
1855 /*
1856 * Sanity check whether the found disklabel is valid.
1857 *
1858 * This is necessary since total size of the raid device
1859 * may vary when an interleave is changed even though exactly
1860 * same componets are used, and old disklabel may used
1861 * if that is found.
1862 */
1863 if (lp->d_secperunit != rs->sc_size)
1864 printf("WARNING: %s: "
1865 "total sector size in disklabel (%d) != "
1866 "the size of raid (%ld)\n", rs->sc_xname,
1867 lp->d_secperunit, (long) rs->sc_size);
1868 for (i = 0; i < lp->d_npartitions; i++) {
1869 pp = &lp->d_partitions[i];
1870 if (pp->p_offset + pp->p_size > rs->sc_size)
1871 printf("WARNING: %s: end of partition `%c' "
1872 "exceeds the size of raid (%ld)\n",
1873 rs->sc_xname, 'a' + i, (long) rs->sc_size);
1874 }
1875 }
1876
1877 }
1878 /*
1879 * Take care of things one might want to take care of in the event
1880 * that a disklabel isn't present.
1881 */
1882 static void
1883 raidmakedisklabel(rs)
1884 struct raid_softc *rs;
1885 {
1886 struct disklabel *lp = rs->sc_dkdev.dk_label;
1887 db1_printf(("Making a label..\n"));
1888
1889 /*
1890 * For historical reasons, if there's no disklabel present
1891 * the raw partition must be marked FS_BSDFFS.
1892 */
1893
1894 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1895
1896 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1897
1898 lp->d_checksum = dkcksum(lp);
1899 }
1900 /*
1901 * Lookup the provided name in the filesystem. If the file exists,
1902 * is a valid block device, and isn't being used by anyone else,
1903 * set *vpp to the file's vnode.
1904 * You'll find the original of this in ccd.c
1905 */
1906 int
1907 raidlookup(path, p, vpp)
1908 char *path;
1909 struct proc *p;
1910 struct vnode **vpp; /* result */
1911 {
1912 struct nameidata nd;
1913 struct vnode *vp;
1914 struct vattr va;
1915 int error;
1916
1917 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1918 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1919 #ifdef DEBUG
1920 printf("RAIDframe: vn_open returned %d\n", error);
1921 #endif
1922 return (error);
1923 }
1924 vp = nd.ni_vp;
1925 if (vp->v_usecount > 1) {
1926 VOP_UNLOCK(vp, 0);
1927 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1928 return (EBUSY);
1929 }
1930 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1931 VOP_UNLOCK(vp, 0);
1932 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1933 return (error);
1934 }
1935 /* XXX: eventually we should handle VREG, too. */
1936 if (va.va_type != VBLK) {
1937 VOP_UNLOCK(vp, 0);
1938 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1939 return (ENOTBLK);
1940 }
1941 VOP_UNLOCK(vp, 0);
1942 *vpp = vp;
1943 return (0);
1944 }
1945 /*
1946 * Wait interruptibly for an exclusive lock.
1947 *
1948 * XXX
1949 * Several drivers do this; it should be abstracted and made MP-safe.
1950 * (Hmm... where have we seen this warning before :-> GO )
1951 */
1952 static int
1953 raidlock(rs)
1954 struct raid_softc *rs;
1955 {
1956 int error;
1957
1958 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1959 rs->sc_flags |= RAIDF_WANTED;
1960 if ((error =
1961 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1962 return (error);
1963 }
1964 rs->sc_flags |= RAIDF_LOCKED;
1965 return (0);
1966 }
1967 /*
1968 * Unlock and wake up any waiters.
1969 */
1970 static void
1971 raidunlock(rs)
1972 struct raid_softc *rs;
1973 {
1974
1975 rs->sc_flags &= ~RAIDF_LOCKED;
1976 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1977 rs->sc_flags &= ~RAIDF_WANTED;
1978 wakeup(rs);
1979 }
1980 }
1981
1982
1983 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
1984 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1985
1986 int
1987 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
1988 {
1989 RF_ComponentLabel_t component_label;
1990 raidread_component_label(dev, b_vp, &component_label);
1991 component_label.mod_counter = mod_counter;
1992 component_label.clean = RF_RAID_CLEAN;
1993 raidwrite_component_label(dev, b_vp, &component_label);
1994 return(0);
1995 }
1996
1997
1998 int
1999 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2000 {
2001 RF_ComponentLabel_t component_label;
2002 raidread_component_label(dev, b_vp, &component_label);
2003 component_label.mod_counter = mod_counter;
2004 component_label.clean = RF_RAID_DIRTY;
2005 raidwrite_component_label(dev, b_vp, &component_label);
2006 return(0);
2007 }
2008
2009 /* ARGSUSED */
2010 int
2011 raidread_component_label(dev, b_vp, component_label)
2012 dev_t dev;
2013 struct vnode *b_vp;
2014 RF_ComponentLabel_t *component_label;
2015 {
2016 struct buf *bp;
2017 int error;
2018
2019 /* XXX should probably ensure that we don't try to do this if
2020 someone has changed rf_protected_sectors. */
2021
2022 /* get a block of the appropriate size... */
2023 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2024 bp->b_dev = dev;
2025
2026 /* get our ducks in a row for the read */
2027 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2028 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2029 bp->b_flags = B_BUSY | B_READ;
2030 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2031
2032 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2033
2034 error = biowait(bp);
2035
2036 if (!error) {
2037 memcpy(component_label, bp->b_un.b_addr,
2038 sizeof(RF_ComponentLabel_t));
2039 #if 0
2040 printf("raidread_component_label: got component label:\n");
2041 printf("Version: %d\n",component_label->version);
2042 printf("Serial Number: %d\n",component_label->serial_number);
2043 printf("Mod counter: %d\n",component_label->mod_counter);
2044 printf("Row: %d\n", component_label->row);
2045 printf("Column: %d\n", component_label->column);
2046 printf("Num Rows: %d\n", component_label->num_rows);
2047 printf("Num Columns: %d\n", component_label->num_columns);
2048 printf("Clean: %d\n", component_label->clean);
2049 printf("Status: %d\n", component_label->status);
2050 #endif
2051 } else {
2052 printf("Failed to read RAID component label!\n");
2053 }
2054
2055 bp->b_flags = B_INVAL | B_AGE;
2056 brelse(bp);
2057 return(error);
2058 }
2059 /* ARGSUSED */
2060 int
2061 raidwrite_component_label(dev, b_vp, component_label)
2062 dev_t dev;
2063 struct vnode *b_vp;
2064 RF_ComponentLabel_t *component_label;
2065 {
2066 struct buf *bp;
2067 int error;
2068
2069 /* get a block of the appropriate size... */
2070 bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2071 bp->b_dev = dev;
2072
2073 /* get our ducks in a row for the write */
2074 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2075 bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2076 bp->b_flags = B_BUSY | B_WRITE;
2077 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2078
2079 memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
2080
2081 memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
2082
2083 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2084 error = biowait(bp);
2085 bp->b_flags = B_INVAL | B_AGE;
2086 brelse(bp);
2087 if (error) {
2088 printf("Failed to write RAID component info!\n");
2089 }
2090
2091 return(error);
2092 }
2093
2094 void
2095 rf_markalldirty( raidPtr )
2096 RF_Raid_t *raidPtr;
2097 {
2098 RF_ComponentLabel_t c_label;
2099 int r,c;
2100
2101 raidPtr->mod_counter++;
2102 for (r = 0; r < raidPtr->numRow; r++) {
2103 for (c = 0; c < raidPtr->numCol; c++) {
2104 if (raidPtr->Disks[r][c].status != rf_ds_failed) {
2105 raidread_component_label(
2106 raidPtr->Disks[r][c].dev,
2107 raidPtr->raid_cinfo[r][c].ci_vp,
2108 &c_label);
2109 if (c_label.status == rf_ds_spared) {
2110 /* XXX do something special...
2111 but whatever you do, don't
2112 try to access it!! */
2113 } else {
2114 #if 0
2115 c_label.status =
2116 raidPtr->Disks[r][c].status;
2117 raidwrite_component_label(
2118 raidPtr->Disks[r][c].dev,
2119 raidPtr->raid_cinfo[r][c].ci_vp,
2120 &c_label);
2121 #endif
2122 raidmarkdirty(
2123 raidPtr->Disks[r][c].dev,
2124 raidPtr->raid_cinfo[r][c].ci_vp,
2125 raidPtr->mod_counter);
2126 }
2127 }
2128 }
2129 }
2130 /* printf("Component labels marked dirty.\n"); */
2131 #if 0
2132 for( c = 0; c < raidPtr->numSpare ; c++) {
2133 sparecol = raidPtr->numCol + c;
2134 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2135 /*
2136
2137 XXX this is where we get fancy and map this spare
2138 into it's correct spot in the array.
2139
2140 */
2141 /*
2142
2143 we claim this disk is "optimal" if it's
2144 rf_ds_used_spare, as that means it should be
2145 directly substitutable for the disk it replaced.
2146 We note that too...
2147
2148 */
2149
2150 for(i=0;i<raidPtr->numRow;i++) {
2151 for(j=0;j<raidPtr->numCol;j++) {
2152 if ((raidPtr->Disks[i][j].spareRow ==
2153 r) &&
2154 (raidPtr->Disks[i][j].spareCol ==
2155 sparecol)) {
2156 srow = r;
2157 scol = sparecol;
2158 break;
2159 }
2160 }
2161 }
2162
2163 raidread_component_label(
2164 raidPtr->Disks[r][sparecol].dev,
2165 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2166 &c_label);
2167 /* make sure status is noted */
2168 c_label.version = RF_COMPONENT_LABEL_VERSION;
2169 c_label.mod_counter = raidPtr->mod_counter;
2170 c_label.serial_number = raidPtr->serial_number;
2171 c_label.row = srow;
2172 c_label.column = scol;
2173 c_label.num_rows = raidPtr->numRow;
2174 c_label.num_columns = raidPtr->numCol;
2175 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2176 c_label.status = rf_ds_optimal;
2177 raidwrite_component_label(
2178 raidPtr->Disks[r][sparecol].dev,
2179 raidPtr->raid_cinfo[r][sparecol].ci_vp,
2180 &c_label);
2181 raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2182 raidPtr->raid_cinfo[r][sparecol].ci_vp);
2183 }
2184 }
2185
2186 #endif
2187 }
2188
2189
2190 void
2191 rf_update_component_labels( raidPtr )
2192 RF_Raid_t *raidPtr;
2193 {
2194 RF_ComponentLabel_t c_label;
2195 int sparecol;
2196 int r,c;
2197 int i,j;
2198 int srow, scol;
2199
2200 srow = -1;
2201 scol = -1;
2202
2203 /* XXX should do extra checks to make sure things really are clean,
2204 rather than blindly setting the clean bit... */
2205
2206 raidPtr->mod_counter++;
2207
2208 for (r = 0; r < raidPtr->numRow; r++) {
2209 for (c = 0; c < raidPtr->numCol; c++) {
2210 if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2211 raidread_component_label(
2212 raidPtr->Disks[r][c].dev,
2213 raidPtr->raid_cinfo[r][c].ci_vp,
2214 &c_label);
2215 /* make sure status is noted */
2216 c_label.status = rf_ds_optimal;
2217 raidwrite_component_label(
2218 raidPtr->Disks[r][c].dev,
2219 raidPtr->raid_cinfo[r][c].ci_vp,
2220 &c_label);
2221 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2222 raidmarkclean(
2223 raidPtr->Disks[r][c].dev,
2224 raidPtr->raid_cinfo[r][c].ci_vp,
2225 raidPtr->mod_counter);
2226 }
2227 }
2228 /* else we don't touch it.. */
2229 #if 0
2230 else if (raidPtr->Disks[r][c].status !=
2231 rf_ds_failed) {
2232 raidread_component_label(
2233 raidPtr->Disks[r][c].dev,
2234 raidPtr->raid_cinfo[r][c].ci_vp,
2235 &c_label);
2236 /* make sure status is noted */
2237 c_label.status =
2238 raidPtr->Disks[r][c].status;
2239 raidwrite_component_label(
2240 raidPtr->Disks[r][c].dev,
2241 raidPtr->raid_cinfo[r][c].ci_vp,
2242 &c_label);
2243 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2244 raidmarkclean(
2245 raidPtr->Disks[r][c].dev,
2246 raidPtr->raid_cinfo[r][c].ci_vp,
2247 raidPtr->mod_counter);
2248 }
2249 }
2250 #endif
2251 }
2252 }
2253
2254 for( c = 0; c < raidPtr->numSpare ; c++) {
2255 sparecol = raidPtr->numCol + c;
2256 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2257 /*
2258
2259 we claim this disk is "optimal" if it's
2260 rf_ds_used_spare, as that means it should be
2261 directly substitutable for the disk it replaced.
2262 We note that too...
2263
2264 */
2265
2266 for(i=0;i<raidPtr->numRow;i++) {
2267 for(j=0;j<raidPtr->numCol;j++) {
2268 if ((raidPtr->Disks[i][j].spareRow ==
2269 0) &&
2270 (raidPtr->Disks[i][j].spareCol ==
2271 sparecol)) {
2272 srow = i;
2273 scol = j;
2274 break;
2275 }
2276 }
2277 }
2278
2279 raidread_component_label(
2280 raidPtr->Disks[0][sparecol].dev,
2281 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2282 &c_label);
2283 /* make sure status is noted */
2284 c_label.version = RF_COMPONENT_LABEL_VERSION;
2285 c_label.mod_counter = raidPtr->mod_counter;
2286 c_label.serial_number = raidPtr->serial_number;
2287 c_label.row = srow;
2288 c_label.column = scol;
2289 c_label.num_rows = raidPtr->numRow;
2290 c_label.num_columns = raidPtr->numCol;
2291 c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
2292 c_label.status = rf_ds_optimal;
2293 raidwrite_component_label(
2294 raidPtr->Disks[0][sparecol].dev,
2295 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2296 &c_label);
2297 if (raidPtr->parity_good == RF_RAID_CLEAN) {
2298 raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2299 raidPtr->raid_cinfo[0][sparecol].ci_vp,
2300 raidPtr->mod_counter);
2301 }
2302 }
2303 }
2304 /* printf("Component labels updated\n"); */
2305 }
2306