rf_netbsdkintf.c revision 1.10 1 /* $NetBSD: rf_netbsdkintf.c,v 1.10 1999/02/11 01:23:32 oster Exp $ */
2 /*-
3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Greg Oster; Jason R. Thorpe.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the NetBSD
20 * Foundation, Inc. and its contributors.
21 * 4. Neither the name of The NetBSD Foundation nor the names of its
22 * contributors may be used to endorse or promote products derived
23 * from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38 /*
39 * Copyright (c) 1988 University of Utah.
40 * Copyright (c) 1990, 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: Utah $Hdr: cd.c 1.6 90/11/28$
76 *
77 * @(#)cd.c 8.2 (Berkeley) 11/16/93
78 */
79
80
81
82
83 /*
84 * Copyright (c) 1995 Carnegie-Mellon University.
85 * All rights reserved.
86 *
87 * Authors: Mark Holland, Jim Zelenka
88 *
89 * Permission to use, copy, modify and distribute this software and
90 * its documentation is hereby granted, provided that both the copyright
91 * notice and this permission notice appear in all copies of the
92 * software, derivative works or modified versions, and any portions
93 * thereof, and that both notices appear in supporting documentation.
94 *
95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98 *
99 * Carnegie Mellon requests users of this software to return to
100 *
101 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
102 * School of Computer Science
103 * Carnegie Mellon University
104 * Pittsburgh PA 15213-3890
105 *
106 * any improvements or extensions that they make and grant Carnegie the
107 * rights to redistribute these changes.
108 */
109
110 /***********************************************************
111 *
112 * rf_kintf.c -- the kernel interface routines for RAIDframe
113 *
114 ***********************************************************/
115
116 #include <sys/errno.h>
117 #include <sys/param.h>
118 #include <sys/pool.h>
119 #include <sys/queue.h>
120 #include <sys/disk.h>
121 #include <sys/device.h>
122 #include <sys/stat.h>
123 #include <sys/ioctl.h>
124 #include <sys/fcntl.h>
125 #include <sys/systm.h>
126 #include <sys/namei.h>
127 #include <sys/vnode.h>
128 #include <sys/param.h>
129 #include <sys/types.h>
130 #include <machine/types.h>
131 #include <sys/disklabel.h>
132 #include <sys/conf.h>
133 #include <sys/lock.h>
134 #include <sys/buf.h>
135 #include <sys/user.h>
136
137 #include "raid.h"
138 #include "rf_raid.h"
139 #include "rf_raidframe.h"
140 #include "rf_dag.h"
141 #include "rf_dagflags.h"
142 #include "rf_diskqueue.h"
143 #include "rf_acctrace.h"
144 #include "rf_etimer.h"
145 #include "rf_general.h"
146 #include "rf_debugMem.h"
147 #include "rf_kintf.h"
148 #include "rf_options.h"
149 #include "rf_driver.h"
150 #include "rf_parityscan.h"
151 #include "rf_debugprint.h"
152 #include "rf_threadstuff.h"
153
154 int rf_kdebug_level = 0;
155
156 #define RFK_BOOT_NONE 0
157 #define RFK_BOOT_GOOD 1
158 #define RFK_BOOT_BAD 2
159 static int rf_kbooted = RFK_BOOT_NONE;
160
161 #ifdef DEBUG
162 #define db0_printf(a) printf a
163 #define db_printf(a) if (rf_kdebug_level > 0) printf a
164 #define db1_printf(a) if (rf_kdebug_level > 0) printf a
165 #define db2_printf(a) if (rf_kdebug_level > 1) printf a
166 #define db3_printf(a) if (rf_kdebug_level > 2) printf a
167 #define db4_printf(a) if (rf_kdebug_level > 3) printf a
168 #define db5_printf(a) if (rf_kdebug_level > 4) printf a
169 #else /* DEBUG */
170 #define db0_printf(a) printf a
171 #define db1_printf(a) { }
172 #define db2_printf(a) { }
173 #define db3_printf(a) { }
174 #define db4_printf(a) { }
175 #define db5_printf(a) { }
176 #endif /* DEBUG */
177
178 static RF_Raid_t **raidPtrs; /* global raid device descriptors */
179
180 static int rf_pending_testaccs;
181
182 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
183 RF_DECLARE_STATIC_MUTEX(rf_async_done_q_mutex)
184 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
185 * spare table */
186 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
187 * installation process */
188 static struct rf_test_acc *rf_async_done_qh, *rf_async_done_qt;
189
190 static struct rf_recon_req *recon_queue = NULL; /* used to communicate
191 * reconstruction
192 * requests */
193
194
195 decl_simple_lock_data(, recon_queue_mutex)
196 #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
197 #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
198
199 /* prototypes */
200 static void KernelWakeupFunc(struct buf * bp);
201 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
202 dev_t dev, RF_SectorNum_t startSect,
203 RF_SectorCount_t numSect, caddr_t buf,
204 void (*cbFunc) (struct buf *), void *cbArg,
205 int logBytesPerSector, struct proc * b_proc);
206
207 #define Dprintf0(s) if (rf_queueDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
208 #define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
209 #define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
210 #define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
211
212
213 /* this is so that we can compile under 2.0 as well as 3.2 */
214 #ifndef proc_to_task
215 #define proc_to_task(x) ((x)->task)
216 #endif /* !proc_to_task */
217
218 void raidattach __P((int));
219 int raidsize __P((dev_t));
220
221 void rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
222 void rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
223 static int raidinit __P((dev_t, RF_Raid_t *, int));
224
225 int raidopen __P((dev_t, int, int, struct proc *));
226 int raidclose __P((dev_t, int, int, struct proc *));
227 int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
228 int raidwrite __P((dev_t, struct uio *, int));
229 int raidread __P((dev_t, struct uio *, int));
230 void raidstrategy __P((struct buf *));
231 int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
232
233 /*
234 * Pilfered from ccd.c
235 */
236
237 struct raidbuf {
238 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
239 struct buf *rf_obp; /* ptr. to original I/O buf */
240 int rf_flags; /* misc. flags */
241 RF_DiskQueueData_t *req; /* the request that this was
242 * part of.. */
243 };
244
245
246 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
247 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
248
249 /* XXX Not sure if the following should be replacing the raidPtrs above,
250 or if it should be used in conjunction with that... */
251
252 struct raid_softc {
253 int sc_unit;/* logical unit number */
254 int sc_flags; /* flags */
255 int sc_cflags; /* configuration flags */
256 size_t sc_size;/* size of the raid device */
257 dev_t sc_dev; /* our device.. */
258 char sc_xname[20]; /* XXX external name */
259 struct disk sc_dkdev; /* generic disk device info */
260 struct pool sc_cbufpool; /* component buffer pool */
261 };
262 /* sc_flags */
263 #define RAIDF_INITED 0x01 /* unit has been initialized */
264 #define RAIDF_WLABEL 0x02 /* label area is writable */
265 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
266 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
267 #define RAIDF_LOCKED 0x80 /* unit is locked */
268
269 #define raidunit(x) DISKUNIT(x)
270 static int numraid = 0;
271
272 #define RAIDLABELDEV(dev) \
273 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
274
275 /* declared here, and made public, for the benefit of KVM stuff.. */
276 struct raid_softc *raid_softc;
277
278 static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
279 struct disklabel *));
280 static void raidgetdisklabel __P((dev_t));
281 static void raidmakedisklabel __P((struct raid_softc *));
282
283 static int raidlock __P((struct raid_softc *));
284 static void raidunlock __P((struct raid_softc *));
285 int raidlookup __P((char *, struct proc * p, struct vnode **));
286
287
288 void
289 raidattach(num)
290 int num;
291 {
292 int raidID;
293
294 #ifdef DEBUG
295 printf("raidattach: Asked for %d units\n", num);
296 #endif
297
298 if (num <= 0) {
299 #ifdef DIAGNOSTIC
300 panic("raidattach: count <= 0");
301 #endif
302 return;
303 }
304 /* This is where all the initialization stuff gets done. */
305
306 /* Make some space for requested number of units... */
307
308 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
309 if (raidPtrs == NULL) {
310 panic("raidPtrs is NULL!!\n");
311 }
312 rf_kbooted = rf_boot();
313 if (rf_kbooted) {
314 panic("Serious error booting RAID!!\n");
315 }
316 rf_kbooted = RFK_BOOT_GOOD;
317
318 /* put together some datastructures like the CCD device does.. This
319 * lets us lock the device and what-not when it gets opened. */
320
321 raid_softc = (struct raid_softc *)
322 malloc(num * sizeof(struct raid_softc),
323 M_RAIDFRAME, M_NOWAIT);
324 if (raid_softc == NULL) {
325 printf("WARNING: no memory for RAIDframe driver\n");
326 return;
327 }
328 numraid = num;
329 bzero(raid_softc, num * sizeof(struct raid_softc));
330
331 for (raidID = 0; raidID < num; raidID++) {
332 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
333 (RF_Raid_t *));
334 if (raidPtrs[raidID] == NULL) {
335 printf("raidPtrs[%d] is NULL\n", raidID);
336 }
337 }
338 }
339
340
341 int
342 raidsize(dev)
343 dev_t dev;
344 {
345 struct raid_softc *rs;
346 struct disklabel *lp;
347 int part, unit, omask, size;
348
349 unit = raidunit(dev);
350 if (unit >= numraid)
351 return (-1);
352 rs = &raid_softc[unit];
353
354 if ((rs->sc_flags & RAIDF_INITED) == 0)
355 return (-1);
356
357 part = DISKPART(dev);
358 omask = rs->sc_dkdev.dk_openmask & (1 << part);
359 lp = rs->sc_dkdev.dk_label;
360
361 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
362 return (-1);
363
364 if (lp->d_partitions[part].p_fstype != FS_SWAP)
365 size = -1;
366 else
367 size = lp->d_partitions[part].p_size *
368 (lp->d_secsize / DEV_BSIZE);
369
370 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
371 return (-1);
372
373 return (size);
374
375 }
376
377 int
378 raiddump(dev, blkno, va, size)
379 dev_t dev;
380 daddr_t blkno;
381 caddr_t va;
382 size_t size;
383 {
384 /* Not implemented. */
385 return ENXIO;
386 }
387 /* ARGSUSED */
388 int
389 raidopen(dev, flags, fmt, p)
390 dev_t dev;
391 int flags, fmt;
392 struct proc *p;
393 {
394 int unit = raidunit(dev);
395 struct raid_softc *rs;
396 struct disklabel *lp;
397 int part, pmask;
398 unsigned int raidID;
399 int rc;
400 int error = 0;
401
402 /* This whole next chunk of code is somewhat suspect... Not sure it's
403 * needed here at all... XXX */
404
405 if (rf_kbooted == RFK_BOOT_NONE) {
406 printf("Doing restart on raidopen.\n");
407 rf_kbooted = RFK_BOOT_GOOD;
408 rc = rf_boot();
409 if (rc) {
410 rf_kbooted = RFK_BOOT_BAD;
411 printf("Someone is unhappy...\n");
412 return (rc);
413 }
414 }
415 if (unit >= numraid)
416 return (ENXIO);
417 rs = &raid_softc[unit];
418
419 if ((error = raidlock(rs)) != 0)
420 return (error);
421 lp = rs->sc_dkdev.dk_label;
422
423 raidID = raidunit(dev);
424
425 part = DISKPART(dev);
426 pmask = (1 << part);
427
428 db1_printf(("Opening raid device number: %d partition: %d\n",
429 raidID, part));
430
431
432 if ((rs->sc_flags & RAIDF_INITED) &&
433 (rs->sc_dkdev.dk_openmask == 0))
434 raidgetdisklabel(dev);
435
436 /* make sure that this partition exists */
437
438 if (part != RAW_PART) {
439 db1_printf(("Not a raw partition..\n"));
440 if (((rs->sc_flags & RAIDF_INITED) == 0) ||
441 ((part >= lp->d_npartitions) ||
442 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
443 error = ENXIO;
444 raidunlock(rs);
445 db1_printf(("Bailing out...\n"));
446 return (error);
447 }
448 }
449 /* Prevent this unit from being unconfigured while open. */
450 switch (fmt) {
451 case S_IFCHR:
452 rs->sc_dkdev.dk_copenmask |= pmask;
453 break;
454
455 case S_IFBLK:
456 rs->sc_dkdev.dk_bopenmask |= pmask;
457 break;
458 }
459 rs->sc_dkdev.dk_openmask =
460 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
461
462 raidunlock(rs);
463
464 return (error);
465
466
467 }
468 /* ARGSUSED */
469 int
470 raidclose(dev, flags, fmt, p)
471 dev_t dev;
472 int flags, fmt;
473 struct proc *p;
474 {
475 int unit = raidunit(dev);
476 struct raid_softc *rs;
477 int error = 0;
478 int part;
479
480 if (unit >= numraid)
481 return (ENXIO);
482 rs = &raid_softc[unit];
483
484 if ((error = raidlock(rs)) != 0)
485 return (error);
486
487 part = DISKPART(dev);
488
489 /* ...that much closer to allowing unconfiguration... */
490 switch (fmt) {
491 case S_IFCHR:
492 rs->sc_dkdev.dk_copenmask &= ~(1 << part);
493 break;
494
495 case S_IFBLK:
496 rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
497 break;
498 }
499 rs->sc_dkdev.dk_openmask =
500 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
501
502 raidunlock(rs);
503 return (0);
504
505 }
506
507 void
508 raidstrategy(bp)
509 register struct buf *bp;
510 {
511 register int s;
512
513 unsigned int raidID = raidunit(bp->b_dev);
514 RF_Raid_t *raidPtr;
515 struct raid_softc *rs = &raid_softc[raidID];
516 struct disklabel *lp;
517 int wlabel;
518
519 #if 0
520 db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
521 db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
522 db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
523 db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
524 db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
525
526 if (bp->b_flags & B_READ)
527 db1_printf(("READ\n"));
528 else
529 db1_printf(("WRITE\n"));
530 #endif
531 if (rf_kbooted != RFK_BOOT_GOOD)
532 return;
533 if (raidID >= numraid || !raidPtrs[raidID]) {
534 bp->b_error = ENODEV;
535 bp->b_flags |= B_ERROR;
536 bp->b_resid = bp->b_bcount;
537 biodone(bp);
538 return;
539 }
540 raidPtr = raidPtrs[raidID];
541 if (!raidPtr->valid) {
542 bp->b_error = ENODEV;
543 bp->b_flags |= B_ERROR;
544 bp->b_resid = bp->b_bcount;
545 biodone(bp);
546 return;
547 }
548 if (bp->b_bcount == 0) {
549 db1_printf(("b_bcount is zero..\n"));
550 biodone(bp);
551 return;
552 }
553 lp = rs->sc_dkdev.dk_label;
554
555 /*
556 * Do bounds checking and adjust transfer. If there's an
557 * error, the bounds check will flag that for us.
558 */
559
560 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
561 if (DISKPART(bp->b_dev) != RAW_PART)
562 if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
563 db1_printf(("Bounds check failed!!:%d %d\n",
564 (int) bp->b_blkno, (int) wlabel));
565 biodone(bp);
566 return;
567 }
568 s = splbio(); /* XXX Needed? */
569 db1_printf(("Beginning strategy...\n"));
570
571 bp->b_resid = 0;
572 bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
573 NULL, NULL, NULL);
574 if (bp->b_error) {
575 bp->b_flags |= B_ERROR;
576 db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
577 bp->b_error));
578 }
579 splx(s);
580 #if 0
581 db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
582 bp, bp->b_data,
583 (int) bp->b_bcount, (int) bp->b_resid));
584 #endif
585 }
586 /* ARGSUSED */
587 int
588 raidread(dev, uio, flags)
589 dev_t dev;
590 struct uio *uio;
591 int flags;
592 {
593 int unit = raidunit(dev);
594 struct raid_softc *rs;
595 int result;
596 int part;
597
598 if (unit >= numraid)
599 return (ENXIO);
600 rs = &raid_softc[unit];
601
602 if ((rs->sc_flags & RAIDF_INITED) == 0)
603 return (ENXIO);
604 part = DISKPART(dev);
605
606 db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
607
608 #if 0
609 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
610 #endif
611 result = physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
612 db1_printf(("raidread done. Result is %d %d\n",
613 result, uio->uio_resid));
614 return (result);
615
616 }
617 /* ARGSUSED */
618 int
619 raidwrite(dev, uio, flags)
620 dev_t dev;
621 struct uio *uio;
622 int flags;
623 {
624 int unit = raidunit(dev);
625 struct raid_softc *rs;
626
627 if (unit >= numraid)
628 return (ENXIO);
629 rs = &raid_softc[unit];
630
631 if ((rs->sc_flags & RAIDF_INITED) == 0)
632 return (ENXIO);
633 db1_printf(("raidwrite\n"));
634 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
635
636
637 }
638
639 int
640 raidioctl(dev, cmd, data, flag, p)
641 dev_t dev;
642 u_long cmd;
643 caddr_t data;
644 int flag;
645 struct proc *p;
646 {
647 int unit = raidunit(dev);
648 int error = 0;
649 int part, pmask;
650 struct raid_softc *rs;
651 #if 0
652 int r, c;
653 #endif
654 /* struct raid_ioctl *ccio = (struct ccd_ioctl *)data; */
655
656 /* struct ccdbuf *cbp; */
657 /* struct raidbuf *raidbp; */
658 RF_Config_t *k_cfg, *u_cfg;
659 u_char *specific_buf;
660 int retcode = 0;
661
662 int row;
663 struct rf_recon_req *rrcopy, *rr;
664 #if 0
665 int nbytes, spl, rw, row;
666 struct rf_test_acc *ta;
667 struct buf *bp;
668 RF_SparetWait_t *waitreq;
669 struct rf_test_acc *ta_p, *ta_copy;
670 #endif
671
672 if (unit >= numraid)
673 return (ENXIO);
674 rs = &raid_softc[unit];
675
676 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
677 (int) DISKPART(dev), (int) unit, (int) cmd));
678
679 /* Must be open for writes for these commands... */
680 switch (cmd) {
681 case DIOCSDINFO:
682 case DIOCWDINFO:
683 case DIOCWLABEL:
684 if ((flag & FWRITE) == 0)
685 return (EBADF);
686 }
687
688 /* Must be initialized for these... */
689 switch (cmd) {
690 case DIOCGDINFO:
691 case DIOCSDINFO:
692 case DIOCWDINFO:
693 case DIOCGPART:
694 case DIOCWLABEL:
695 case DIOCGDEFLABEL:
696 case RAIDFRAME_SHUTDOWN:
697 case RAIDFRAME_REWRITEPARITY:
698 case RAIDFRAME_GET_INFO:
699 case RAIDFRAME_RESET_ACCTOTALS:
700 case RAIDFRAME_GET_ACCTOTALS:
701 case RAIDFRAME_KEEP_ACCTOTALS:
702 case RAIDFRAME_GET_SIZE:
703 case RAIDFRAME_FAIL_DISK:
704 case RAIDFRAME_COPYBACK:
705 case RAIDFRAME_CHECKRECON:
706 if ((rs->sc_flags & RAIDF_INITED) == 0)
707 return (ENXIO);
708 }
709
710 switch (cmd) {
711
712
713 /* configure the system */
714 case RAIDFRAME_CONFIGURE:
715
716 db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
717 /* copy-in the configuration information */
718 /* data points to a pointer to the configuration structure */
719 u_cfg = *((RF_Config_t **) data);
720 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
721 if (k_cfg == NULL) {
722 db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
723 return (ENOMEM);
724 }
725 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
726 sizeof(RF_Config_t));
727 if (retcode) {
728 db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
729 retcode));
730 return (retcode);
731 }
732 /* allocate a buffer for the layout-specific data, and copy it
733 * in */
734 if (k_cfg->layoutSpecificSize) {
735 if (k_cfg->layoutSpecificSize > 10000) {
736 /* sanity check */
737 db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
738 return (EINVAL);
739 }
740 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
741 (u_char *));
742 if (specific_buf == NULL) {
743 RF_Free(k_cfg, sizeof(RF_Config_t));
744 db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
745 return (ENOMEM);
746 }
747 retcode = copyin(k_cfg->layoutSpecific,
748 (caddr_t) specific_buf,
749 k_cfg->layoutSpecificSize);
750 if (retcode) {
751 db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
752 retcode));
753 return (retcode);
754 }
755 } else
756 specific_buf = NULL;
757 k_cfg->layoutSpecific = specific_buf;
758
759 /* should do some kind of sanity check on the configuration.
760 * Store the sum of all the bytes in the last byte? */
761
762 #if 0
763 db1_printf(("Considering configuring the system.:%d 0x%x\n",
764 unit, p));
765 #endif
766
767 /* We need the pointer to this a little deeper, so stash it
768 * here... */
769
770 raidPtrs[unit]->proc = p;
771
772 /* configure the system */
773 rf_pending_testaccs = 0;
774
775
776 raidPtrs[unit]->raidid = unit;
777 retcode = rf_Configure(raidPtrs[unit], k_cfg);
778
779
780 if (retcode == 0) {
781 retcode = raidinit(dev, raidPtrs[unit], unit);
782 }
783 /* free the buffers. No return code here. */
784 if (k_cfg->layoutSpecificSize) {
785 RF_Free(specific_buf, k_cfg->layoutSpecificSize);
786 }
787 RF_Free(k_cfg, sizeof(RF_Config_t));
788
789 db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
790 retcode));
791 return (retcode);
792
793 /* shutdown the system */
794 case RAIDFRAME_SHUTDOWN:
795
796 if ((error = raidlock(rs)) != 0)
797 return (error);
798
799 /*
800 * If somebody has a partition mounted, we shouldn't
801 * shutdown.
802 */
803
804 part = DISKPART(dev);
805 pmask = (1 << part);
806 if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
807 ((rs->sc_dkdev.dk_bopenmask & pmask) &&
808 (rs->sc_dkdev.dk_copenmask & pmask))) {
809 raidunlock(rs);
810 return (EBUSY);
811 }
812 /* the intention here was to disallow shutdowns while
813 * raidframe is mounted, but it doesn't work because the
814 * shutdown ioctl calls rf_open */
815 if (rf_pending_testaccs > 0) {
816 printf("RAIDFRAME: Can't shutdown because there are %d pending test accs\n",
817 rf_pending_testaccs);
818 return (EINVAL);
819 }
820 if (rf_debugKernelAccess) {
821 printf("call shutdown\n");
822 }
823 raidPtrs[unit]->proc = p; /* XXX necessary evil */
824 retcode = rf_Shutdown(raidPtrs[unit]);
825
826 db1_printf(("Done main shutdown\n"));
827
828 pool_destroy(&rs->sc_cbufpool);
829 db1_printf(("Done freeing component buffer freelist\n"));
830
831 /* It's no longer initialized... */
832 rs->sc_flags &= ~RAIDF_INITED;
833
834 /* Detach the disk. */
835 disk_detach(&rs->sc_dkdev);
836
837 raidunlock(rs);
838
839 return (retcode);
840
841 /* initialize all parity */
842 case RAIDFRAME_REWRITEPARITY:
843
844 if (raidPtrs[unit]->Layout.map->faultsTolerated == 0)
845 return (EINVAL);
846 /* borrow the thread of the requesting process */
847 raidPtrs[unit]->proc = p; /* Blah... :-p GO */
848 retcode = rf_RewriteParity(raidPtrs[unit]);
849 /* return I/O Error if the parity rewrite fails */
850
851 if (retcode)
852 retcode = EIO;
853 return (retcode);
854
855 /* issue a test-unit-ready through raidframe to the indicated
856 * device */
857 #if 0 /* XXX not supported yet (ever?) */
858 case RAIDFRAME_TUR:
859 /* debug only */
860 retcode = rf_SCSI_DoTUR(0, 0, 0, 0, *(dev_t *) data);
861 return (retcode);
862 #endif
863 case RAIDFRAME_GET_INFO:
864 {
865 RF_Raid_t *raid = raidPtrs[unit];
866 RF_DeviceConfig_t *cfg, **ucfgp;
867 int i, j, d;
868
869 if (!raid->valid)
870 return (ENODEV);
871 ucfgp = (RF_DeviceConfig_t **) data;
872 RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
873 (RF_DeviceConfig_t *));
874 if (cfg == NULL)
875 return (ENOMEM);
876 bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
877 cfg->rows = raid->numRow;
878 cfg->cols = raid->numCol;
879 cfg->ndevs = raid->numRow * raid->numCol;
880 if (cfg->ndevs >= RF_MAX_DISKS) {
881 cfg->ndevs = 0;
882 return (ENOMEM);
883 }
884 cfg->nspares = raid->numSpare;
885 if (cfg->nspares >= RF_MAX_DISKS) {
886 cfg->nspares = 0;
887 return (ENOMEM);
888 }
889 cfg->maxqdepth = raid->maxQueueDepth;
890 d = 0;
891 for (i = 0; i < cfg->rows; i++) {
892 for (j = 0; j < cfg->cols; j++) {
893 cfg->devs[d] = raid->Disks[i][j];
894 d++;
895 }
896 }
897 for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
898 cfg->spares[i] = raid->Disks[0][j];
899 }
900 retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
901 sizeof(RF_DeviceConfig_t));
902 RF_Free(cfg, sizeof(RF_DeviceConfig_t));
903
904 return (retcode);
905 }
906 break;
907
908 case RAIDFRAME_RESET_ACCTOTALS:
909 {
910 RF_Raid_t *raid = raidPtrs[unit];
911
912 bzero(&raid->acc_totals, sizeof(raid->acc_totals));
913 return (0);
914 }
915 break;
916
917 case RAIDFRAME_GET_ACCTOTALS:
918 {
919 RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
920 RF_Raid_t *raid = raidPtrs[unit];
921
922 *totals = raid->acc_totals;
923 return (0);
924 }
925 break;
926
927 case RAIDFRAME_KEEP_ACCTOTALS:
928 {
929 RF_Raid_t *raid = raidPtrs[unit];
930 int *keep = (int *) data;
931
932 raid->keep_acc_totals = *keep;
933 return (0);
934 }
935 break;
936
937 case RAIDFRAME_GET_SIZE:
938 *(int *) data = raidPtrs[unit]->totalSectors;
939 return (0);
940
941 #define RAIDFRAME_RECON 1
942 /* XXX The above should probably be set somewhere else!! GO */
943 #if RAIDFRAME_RECON > 0
944
945 /* fail a disk & optionally start reconstruction */
946 case RAIDFRAME_FAIL_DISK:
947 rr = (struct rf_recon_req *) data;
948
949 if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
950 || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
951 return (EINVAL);
952
953 printf("Failing the disk: row: %d col: %d\n", rr->row, rr->col);
954
955 /* make a copy of the recon request so that we don't rely on
956 * the user's buffer */
957 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
958 bcopy(rr, rrcopy, sizeof(*rr));
959 rrcopy->raidPtr = (void *) raidPtrs[unit];
960
961 LOCK_RECON_Q_MUTEX();
962 rrcopy->next = recon_queue;
963 recon_queue = rrcopy;
964 wakeup(&recon_queue);
965 UNLOCK_RECON_Q_MUTEX();
966
967 return (0);
968
969 /* invoke a copyback operation after recon on whatever disk
970 * needs it, if any */
971 case RAIDFRAME_COPYBACK:
972 /* borrow the current thread to get this done */
973 raidPtrs[unit]->proc = p; /* ICK.. but needed :-p GO */
974 rf_CopybackReconstructedData(raidPtrs[unit]);
975 return (0);
976
977 /* return the percentage completion of reconstruction */
978 case RAIDFRAME_CHECKRECON:
979 row = *(int *) data;
980 if (row < 0 || row >= raidPtrs[unit]->numRow)
981 return (EINVAL);
982 if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
983 *(int *) data = 100;
984 else
985 *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
986 return (0);
987
988 /* the sparetable daemon calls this to wait for the kernel to
989 * need a spare table. this ioctl does not return until a
990 * spare table is needed. XXX -- calling mpsleep here in the
991 * ioctl code is almost certainly wrong and evil. -- XXX XXX
992 * -- I should either compute the spare table in the kernel,
993 * or have a different -- XXX XXX -- interface (a different
994 * character device) for delivering the table -- XXX */
995 #if 0
996 case RAIDFRAME_SPARET_WAIT:
997 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
998 while (!rf_sparet_wait_queue)
999 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1000 waitreq = rf_sparet_wait_queue;
1001 rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1002 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1003
1004 *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
1005
1006 RF_Free(waitreq, sizeof(*waitreq));
1007 return (0);
1008
1009
1010 /* wakes up a process waiting on SPARET_WAIT and puts an error
1011 * code in it that will cause the dameon to exit */
1012 case RAIDFRAME_ABORT_SPARET_WAIT:
1013 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1014 waitreq->fcol = -1;
1015 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1016 waitreq->next = rf_sparet_wait_queue;
1017 rf_sparet_wait_queue = waitreq;
1018 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1019 wakeup(&rf_sparet_wait_queue);
1020 return (0);
1021
1022 /* used by the spare table daemon to deliver a spare table
1023 * into the kernel */
1024 case RAIDFRAME_SEND_SPARET:
1025
1026 /* install the spare table */
1027 retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
1028
1029 /* respond to the requestor. the return status of the spare
1030 * table installation is passed in the "fcol" field */
1031 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1032 waitreq->fcol = retcode;
1033 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1034 waitreq->next = rf_sparet_resp_queue;
1035 rf_sparet_resp_queue = waitreq;
1036 wakeup(&rf_sparet_resp_queue);
1037 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1038
1039 return (retcode);
1040 #endif
1041
1042
1043 #endif /* RAIDFRAME_RECON > 0 */
1044
1045 default:
1046 break; /* fall through to the os-specific code below */
1047
1048 }
1049
1050 if (!raidPtrs[unit]->valid)
1051 return (EINVAL);
1052
1053 /*
1054 * Add support for "regular" device ioctls here.
1055 */
1056
1057 switch (cmd) {
1058 case DIOCGDINFO:
1059 db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
1060 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1061 break;
1062
1063 case DIOCGPART:
1064 db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
1065 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1066 ((struct partinfo *) data)->part =
1067 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1068 break;
1069
1070 case DIOCWDINFO:
1071 db1_printf(("DIOCWDINFO\n"));
1072 case DIOCSDINFO:
1073 db1_printf(("DIOCSDINFO\n"));
1074 if ((error = raidlock(rs)) != 0)
1075 return (error);
1076
1077 rs->sc_flags |= RAIDF_LABELLING;
1078
1079 error = setdisklabel(rs->sc_dkdev.dk_label,
1080 (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1081 if (error == 0) {
1082 if (cmd == DIOCWDINFO)
1083 error = writedisklabel(RAIDLABELDEV(dev),
1084 raidstrategy, rs->sc_dkdev.dk_label,
1085 rs->sc_dkdev.dk_cpulabel);
1086 }
1087 rs->sc_flags &= ~RAIDF_LABELLING;
1088
1089 raidunlock(rs);
1090
1091 if (error)
1092 return (error);
1093 break;
1094
1095 case DIOCWLABEL:
1096 db1_printf(("DIOCWLABEL\n"));
1097 if (*(int *) data != 0)
1098 rs->sc_flags |= RAIDF_WLABEL;
1099 else
1100 rs->sc_flags &= ~RAIDF_WLABEL;
1101 break;
1102
1103 case DIOCGDEFLABEL:
1104 db1_printf(("DIOCGDEFLABEL\n"));
1105 raidgetdefaultlabel(raidPtrs[unit], rs,
1106 (struct disklabel *) data);
1107 break;
1108
1109 default:
1110 retcode = ENOTTY; /* XXXX ?? OR EINVAL ? */
1111 }
1112 return (retcode);
1113
1114 }
1115
1116
1117 /* raidinit -- complete the rest of the initialization for the
1118 RAIDframe device. */
1119
1120
1121 static int
1122 raidinit(dev, raidPtr, unit)
1123 dev_t dev;
1124 RF_Raid_t *raidPtr;
1125 int unit;
1126 {
1127 int retcode;
1128 /* int ix; */
1129 /* struct raidbuf *raidbp; */
1130 struct raid_softc *rs;
1131
1132 retcode = 0;
1133
1134 rs = &raid_softc[unit];
1135 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1136 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1137
1138
1139 /* XXX should check return code first... */
1140 rs->sc_flags |= RAIDF_INITED;
1141
1142 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1143
1144 rs->sc_dkdev.dk_name = rs->sc_xname;
1145 /* disk_attach actually creates space for the CPU disklabel, among
1146 * other things, so it's critical to call this *BEFORE* we try putzing
1147 * with disklabels. */
1148 disk_attach(&rs->sc_dkdev);
1149
1150 /* XXX There may be a weird interaction here between this, and
1151 * protectedSectors, as used in RAIDframe. */
1152 rs->sc_size = raidPtr->totalSectors;
1153 rs->sc_dev = dev;
1154 return (retcode);
1155 }
1156
1157
1158 /*********************************************************
1159 *
1160 * initialization code called at boot time (startup.c)
1161 *
1162 ********************************************************/
1163 int
1164 rf_boot()
1165 {
1166 int i, rc;
1167
1168 rc = rf_mutex_init(&rf_sparet_wait_mutex);
1169 if (rc) {
1170 RF_PANIC();
1171 }
1172 rc = rf_mutex_init(&rf_async_done_q_mutex);
1173 if (rc) {
1174 RF_PANIC();
1175 }
1176 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
1177 recon_queue = NULL;
1178 rf_async_done_qh = rf_async_done_qt = NULL;
1179 for (i = 0; i < numraid; i++)
1180 raidPtrs[i] = NULL;
1181 rc = rf_BootRaidframe();
1182 if (rc == 0)
1183 printf("Kernelized RAIDframe activated\n");
1184 else
1185 rf_kbooted = RFK_BOOT_BAD;
1186 return (rc);
1187 }
1188 /*
1189 * This kernel thread never exits. It is created once, and persists
1190 * until the system reboots.
1191 */
1192 void
1193 rf_ReconKernelThread()
1194 {
1195 struct rf_recon_req *req;
1196 int s;
1197
1198 /* XXX not sure what spl() level we should be at here... probably
1199 * splbio() */
1200 s = splbio();
1201
1202 while (1) {
1203 /* grab the next reconstruction request from the queue */
1204 LOCK_RECON_Q_MUTEX();
1205 while (!recon_queue) {
1206 UNLOCK_RECON_Q_MUTEX();
1207 tsleep(&recon_queue, PRIBIO | PCATCH, "raidframe recon", 0);
1208 LOCK_RECON_Q_MUTEX();
1209 }
1210 req = recon_queue;
1211 recon_queue = recon_queue->next;
1212 UNLOCK_RECON_Q_MUTEX();
1213
1214 /*
1215 * If flags specifies that we should start recon, this call
1216 * will not return until reconstruction completes, fails, or is aborted.
1217 */
1218 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
1219 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1220
1221 RF_Free(req, sizeof(*req));
1222 }
1223 }
1224 /* wake up the daemon & tell it to get us a spare table
1225 * XXX
1226 * the entries in the queues should be tagged with the raidPtr
1227 * so that in the extremely rare case that two recons happen at once, we know for
1228 * which device were requesting a spare table
1229 * XXX
1230 */
1231 int
1232 rf_GetSpareTableFromDaemon(req)
1233 RF_SparetWait_t *req;
1234 {
1235 int retcode;
1236
1237 RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1238 req->next = rf_sparet_wait_queue;
1239 rf_sparet_wait_queue = req;
1240 wakeup(&rf_sparet_wait_queue);
1241
1242 /* mpsleep unlocks the mutex */
1243 while (!rf_sparet_resp_queue) {
1244 tsleep(&rf_sparet_resp_queue, PRIBIO | PCATCH,
1245 "raidframe getsparetable", 0);
1246 #if 0
1247 mpsleep(&rf_sparet_resp_queue, PZERO, "sparet resp", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1248 #endif
1249 }
1250 req = rf_sparet_resp_queue;
1251 rf_sparet_resp_queue = req->next;
1252 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1253
1254 retcode = req->fcol;
1255 RF_Free(req, sizeof(*req)); /* this is not the same req as we
1256 * alloc'd */
1257 return (retcode);
1258 }
1259 /* a wrapper around rf_DoAccess that extracts appropriate info from the bp & passes it down.
1260 * any calls originating in the kernel must use non-blocking I/O
1261 * do some extra sanity checking to return "appropriate" error values for
1262 * certain conditions (to make some standard utilities work)
1263 */
1264 int
1265 rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
1266 RF_Raid_t *raidPtr;
1267 struct buf *bp;
1268 RF_RaidAccessFlags_t flags;
1269 void (*cbFunc) (struct buf *);
1270 void *cbArg;
1271 {
1272 RF_SectorCount_t num_blocks, pb, sum;
1273 RF_RaidAddr_t raid_addr;
1274 int retcode;
1275 struct partition *pp;
1276 daddr_t blocknum;
1277 int unit;
1278 struct raid_softc *rs;
1279 int do_async;
1280
1281 /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
1282
1283 unit = raidPtr->raidid;
1284 rs = &raid_softc[unit];
1285
1286 /* Ok, for the bp we have here, bp->b_blkno is relative to the
1287 * partition.. Need to make it absolute to the underlying device.. */
1288
1289 blocknum = bp->b_blkno;
1290 if (DISKPART(bp->b_dev) != RAW_PART) {
1291 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1292 blocknum += pp->p_offset;
1293 db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
1294 pp->p_offset));
1295 } else {
1296 db1_printf(("Is raw..\n"));
1297 }
1298 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
1299
1300 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1301 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1302
1303 /* *THIS* is where we adjust what block we're going to... but DO NOT
1304 * TOUCH bp->b_blkno!!! */
1305 raid_addr = blocknum;
1306
1307 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1308 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1309 sum = raid_addr + num_blocks + pb;
1310 if (1 || rf_debugKernelAccess) {
1311 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1312 (int) raid_addr, (int) sum, (int) num_blocks,
1313 (int) pb, (int) bp->b_resid));
1314 }
1315 if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1316 || (sum < num_blocks) || (sum < pb)) {
1317 bp->b_error = ENOSPC;
1318 bp->b_flags |= B_ERROR;
1319 bp->b_resid = bp->b_bcount;
1320 biodone(bp);
1321 return (bp->b_error);
1322 }
1323 /*
1324 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1325 */
1326
1327 if (bp->b_bcount & raidPtr->sectorMask) {
1328 bp->b_error = EINVAL;
1329 bp->b_flags |= B_ERROR;
1330 bp->b_resid = bp->b_bcount;
1331 biodone(bp);
1332 return (bp->b_error);
1333 }
1334 db1_printf(("Calling DoAccess..\n"));
1335
1336 /*
1337 * XXX For now, all writes are sync
1338 */
1339 do_async = 1;
1340 if ((bp->b_flags & B_READ) == 0)
1341 do_async = 0;
1342
1343 /* don't ever condition on bp->b_flags & B_WRITE. always condition on
1344 * B_READ instead */
1345 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1346 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1347 do_async, raid_addr, num_blocks,
1348 bp->b_un.b_addr,
1349 bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
1350 NULL, cbFunc, cbArg);
1351 #if 0
1352 db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
1353 bp->b_data, (int) bp->b_resid));
1354 #endif
1355
1356 /*
1357 * If we requested sync I/O, sleep here.
1358 */
1359 if ((retcode == 0) && (do_async == 0))
1360 tsleep(bp, PRIBIO, "raidsyncio", 0);
1361
1362 return (retcode);
1363 }
1364 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1365
1366 int
1367 rf_DispatchKernelIO(queue, req)
1368 RF_DiskQueue_t *queue;
1369 RF_DiskQueueData_t *req;
1370 {
1371 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1372 struct buf *bp;
1373 struct raidbuf *raidbp = NULL;
1374 struct raid_softc *rs;
1375 int unit;
1376
1377 /* XXX along with the vnode, we also need the softc associated with
1378 * this device.. */
1379
1380 req->queue = queue;
1381
1382 unit = queue->raidPtr->raidid;
1383
1384 db1_printf(("DispatchKernelIO unit: %d\n", unit));
1385
1386 if (unit >= numraid) {
1387 printf("Invalid unit number: %d %d\n", unit, numraid);
1388 panic("Invalid Unit number in rf_DispatchKernelIO\n");
1389 }
1390 rs = &raid_softc[unit];
1391
1392 /* XXX is this the right place? */
1393 disk_busy(&rs->sc_dkdev);
1394
1395 bp = req->bp;
1396
1397 /* XXX when there is a physical disk failure, someone is passing us a
1398 * buffer that contains old stuff!! Attempt to deal with this problem
1399 * without taking a performance hit... (not sure where the real bug
1400 * is. It's buried in RAIDframe somewhere) :-( GO ) */
1401
1402 if (bp->b_flags & B_ERROR) {
1403 bp->b_flags &= ~B_ERROR;
1404 }
1405 if (bp->b_error != 0) {
1406 bp->b_error = 0;
1407 }
1408 raidbp = RAIDGETBUF(rs);
1409
1410 raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1411
1412 /*
1413 * context for raidiodone
1414 */
1415 raidbp->rf_obp = bp;
1416 raidbp->req = req;
1417
1418 switch (req->type) {
1419 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1420 /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
1421 * queue->row, queue->col); */
1422 /* XXX need to do something extra here.. */
1423 /* I'm leaving this in, as I've never actually seen it used,
1424 * and I'd like folks to report it... GO */
1425 printf(("WAKEUP CALLED\n"));
1426 queue->numOutstanding++;
1427
1428 /* XXX need to glue the original buffer into this?? */
1429
1430 KernelWakeupFunc(&raidbp->rf_buf);
1431 break;
1432
1433 case RF_IO_TYPE_READ:
1434 case RF_IO_TYPE_WRITE:
1435
1436 if (req->tracerec) {
1437 RF_ETIMER_START(req->tracerec->timer);
1438 }
1439 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1440 op | bp->b_flags, queue->rf_cinfo->ci_dev,
1441 req->sectorOffset, req->numSector,
1442 req->buf, KernelWakeupFunc, (void *) req,
1443 queue->raidPtr->logBytesPerSector, req->b_proc);
1444
1445 if (rf_debugKernelAccess) {
1446 db1_printf(("dispatch: bp->b_blkno = %ld\n",
1447 (long) bp->b_blkno));
1448 }
1449 queue->numOutstanding++;
1450 queue->last_deq_sector = req->sectorOffset;
1451 /* acc wouldn't have been let in if there were any pending
1452 * reqs at any other priority */
1453 queue->curPriority = req->priority;
1454 /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
1455 * req->type, queue->row, queue->col); */
1456
1457 db1_printf(("Going for %c to unit %d row %d col %d\n",
1458 req->type, unit, queue->row, queue->col));
1459 db1_printf(("sector %d count %d (%d bytes) %d\n",
1460 (int) req->sectorOffset, (int) req->numSector,
1461 (int) (req->numSector <<
1462 queue->raidPtr->logBytesPerSector),
1463 (int) queue->raidPtr->logBytesPerSector));
1464 if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1465 raidbp->rf_buf.b_vp->v_numoutput++;
1466 }
1467 VOP_STRATEGY(&raidbp->rf_buf);
1468
1469 break;
1470
1471 default:
1472 panic("bad req->type in rf_DispatchKernelIO");
1473 }
1474 db1_printf(("Exiting from DispatchKernelIO\n"));
1475 return (0);
1476 }
1477 /* this is the callback function associated with a I/O invoked from
1478 kernel code.
1479 */
1480 static void
1481 KernelWakeupFunc(vbp)
1482 struct buf *vbp;
1483 {
1484 RF_DiskQueueData_t *req = NULL;
1485 RF_DiskQueue_t *queue;
1486 struct raidbuf *raidbp = (struct raidbuf *) vbp;
1487 struct buf *bp;
1488 struct raid_softc *rs;
1489 int unit;
1490 register int s;
1491
1492 s = splbio(); /* XXX */
1493 db1_printf(("recovering the request queue:\n"));
1494 req = raidbp->req;
1495
1496 bp = raidbp->rf_obp;
1497 #if 0
1498 db1_printf(("bp=0x%x\n", bp));
1499 #endif
1500
1501 queue = (RF_DiskQueue_t *) req->queue;
1502
1503 if (raidbp->rf_buf.b_flags & B_ERROR) {
1504 #if 0
1505 printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1506 #endif
1507 bp->b_flags |= B_ERROR;
1508 bp->b_error = raidbp->rf_buf.b_error ?
1509 raidbp->rf_buf.b_error : EIO;
1510 }
1511 #if 0
1512 db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
1513 db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
1514 db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
1515 db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1516 #endif
1517
1518 /* XXX methinks this could be wrong... */
1519 #if 1
1520 bp->b_resid = raidbp->rf_buf.b_resid;
1521 #endif
1522
1523 if (req->tracerec) {
1524 RF_ETIMER_STOP(req->tracerec->timer);
1525 RF_ETIMER_EVAL(req->tracerec->timer);
1526 RF_LOCK_MUTEX(rf_tracing_mutex);
1527 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1528 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1529 req->tracerec->num_phys_ios++;
1530 RF_UNLOCK_MUTEX(rf_tracing_mutex);
1531 }
1532 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1533
1534 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1535
1536
1537 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1538 * ballistic, and mark the component as hosed... */
1539 #if 1
1540 if (bp->b_flags & B_ERROR) {
1541 /* Mark the disk as dead */
1542 /* but only mark it once... */
1543 if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1544 rf_ds_optimal) {
1545 printf("raid%d: IO Error. Marking %s as failed.\n",
1546 unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1547 queue->raidPtr->Disks[queue->row][queue->col].status =
1548 rf_ds_failed;
1549 queue->raidPtr->status[queue->row] = rf_rs_degraded;
1550 queue->raidPtr->numFailures++;
1551 } else { /* Disk is already dead... */
1552 /* printf("Disk already marked as dead!\n"); */
1553 }
1554
1555 }
1556 #endif
1557
1558 rs = &raid_softc[unit];
1559 RAIDPUTBUF(rs, raidbp);
1560
1561
1562 if (bp->b_resid == 0) {
1563 db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
1564 unit, bp->b_resid, bp->b_bcount));
1565 /* XXX is this the right place for a disk_unbusy()??!??!?!? */
1566 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
1567 } else {
1568 db1_printf(("b_resid is still %ld\n", bp->b_resid));
1569 }
1570
1571 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1572 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1573 /* printf("Exiting KernelWakeupFunc\n"); */
1574
1575 splx(s); /* XXX */
1576 }
1577
1578
1579
1580 /*
1581 * initialize a buf structure for doing an I/O in the kernel.
1582 */
1583 static void
1584 InitBP(
1585 struct buf * bp,
1586 struct vnode * b_vp,
1587 unsigned rw_flag,
1588 dev_t dev,
1589 RF_SectorNum_t startSect,
1590 RF_SectorCount_t numSect,
1591 caddr_t buf,
1592 void (*cbFunc) (struct buf *),
1593 void *cbArg,
1594 int logBytesPerSector,
1595 struct proc * b_proc)
1596 {
1597 /* bp->b_flags = B_PHYS | rw_flag; */
1598 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1599 bp->b_bcount = numSect << logBytesPerSector;
1600 bp->b_bufsize = bp->b_bcount;
1601 bp->b_error = 0;
1602 bp->b_dev = dev;
1603 db1_printf(("bp->b_dev is %d\n", dev));
1604 bp->b_un.b_addr = buf;
1605 #if 0
1606 db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1607 #endif
1608
1609 bp->b_blkno = startSect;
1610 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1611 db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1612 if (bp->b_bcount == 0) {
1613 panic("bp->b_bcount is zero in InitBP!!\n");
1614 }
1615 bp->b_proc = b_proc;
1616 bp->b_iodone = cbFunc;
1617 bp->b_vp = b_vp;
1618
1619 }
1620 /* Extras... */
1621
1622 unsigned int
1623 rpcc()
1624 {
1625 /* XXX no clue what this is supposed to do.. my guess is that it's
1626 * supposed to read the CPU cycle counter... */
1627 /* db1_printf("this is supposed to do something useful too!??\n"); */
1628 return (0);
1629 }
1630 #if 0
1631 int
1632 rf_GetSpareTableFromDaemon(req)
1633 RF_SparetWait_t *req;
1634 {
1635 int retcode = 1;
1636 printf("This is supposed to do something useful!!\n"); /* XXX */
1637
1638 return (retcode);
1639
1640 }
1641 #endif
1642
1643 static void
1644 raidgetdefaultlabel(raidPtr, rs, lp)
1645 RF_Raid_t *raidPtr;
1646 struct raid_softc *rs;
1647 struct disklabel *lp;
1648 {
1649 db1_printf(("Building a default label...\n"));
1650 bzero(lp, sizeof(*lp));
1651
1652 /* fabricate a label... */
1653 lp->d_secperunit = raidPtr->totalSectors;
1654 lp->d_secsize = raidPtr->bytesPerSector;
1655 lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
1656 lp->d_ntracks = 1;
1657 lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
1658 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1659
1660 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1661 lp->d_type = DTYPE_RAID;
1662 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1663 lp->d_rpm = 3600;
1664 lp->d_interleave = 1;
1665 lp->d_flags = 0;
1666
1667 lp->d_partitions[RAW_PART].p_offset = 0;
1668 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
1669 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1670 lp->d_npartitions = RAW_PART + 1;
1671
1672 lp->d_magic = DISKMAGIC;
1673 lp->d_magic2 = DISKMAGIC;
1674 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
1675
1676 }
1677 /*
1678 * Read the disklabel from the raid device. If one is not present, fake one
1679 * up.
1680 */
1681 static void
1682 raidgetdisklabel(dev)
1683 dev_t dev;
1684 {
1685 int unit = raidunit(dev);
1686 struct raid_softc *rs = &raid_softc[unit];
1687 char *errstring;
1688 struct disklabel *lp = rs->sc_dkdev.dk_label;
1689 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
1690 RF_Raid_t *raidPtr;
1691
1692 db1_printf(("Getting the disklabel...\n"));
1693
1694 bzero(clp, sizeof(*clp));
1695
1696 raidPtr = raidPtrs[unit];
1697
1698 raidgetdefaultlabel(raidPtr, rs, lp);
1699
1700 /*
1701 * Call the generic disklabel extraction routine.
1702 */
1703 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
1704 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1705 if (errstring)
1706 raidmakedisklabel(rs);
1707 else {
1708 int i;
1709 struct partition *pp;
1710
1711 /*
1712 * Sanity check whether the found disklabel is valid.
1713 *
1714 * This is necessary since total size of the raid device
1715 * may vary when an interleave is changed even though exactly
1716 * same componets are used, and old disklabel may used
1717 * if that is found.
1718 */
1719 if (lp->d_secperunit != rs->sc_size)
1720 printf("WARNING: %s: "
1721 "total sector size in disklabel (%d) != "
1722 "the size of raid (%d)\n", rs->sc_xname,
1723 lp->d_secperunit, rs->sc_size);
1724 for (i = 0; i < lp->d_npartitions; i++) {
1725 pp = &lp->d_partitions[i];
1726 if (pp->p_offset + pp->p_size > rs->sc_size)
1727 printf("WARNING: %s: end of partition `%c' "
1728 "exceeds the size of raid (%d)\n",
1729 rs->sc_xname, 'a' + i, rs->sc_size);
1730 }
1731 }
1732
1733 }
1734 /*
1735 * Take care of things one might want to take care of in the event
1736 * that a disklabel isn't present.
1737 */
1738 static void
1739 raidmakedisklabel(rs)
1740 struct raid_softc *rs;
1741 {
1742 struct disklabel *lp = rs->sc_dkdev.dk_label;
1743 db1_printf(("Making a label..\n"));
1744
1745 /*
1746 * For historical reasons, if there's no disklabel present
1747 * the raw partition must be marked FS_BSDFFS.
1748 */
1749
1750 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1751
1752 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1753
1754 lp->d_checksum = dkcksum(lp);
1755 }
1756 /*
1757 * Lookup the provided name in the filesystem. If the file exists,
1758 * is a valid block device, and isn't being used by anyone else,
1759 * set *vpp to the file's vnode.
1760 * You'll find the original of this in ccd.c
1761 */
1762 int
1763 raidlookup(path, p, vpp)
1764 char *path;
1765 struct proc *p;
1766 struct vnode **vpp; /* result */
1767 {
1768 struct nameidata nd;
1769 struct vnode *vp;
1770 struct vattr va;
1771 int error;
1772
1773 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1774 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1775 #ifdef DEBUG
1776 printf("RAIDframe: vn_open returned %d\n", error);
1777 #endif
1778 return (error);
1779 }
1780 vp = nd.ni_vp;
1781 if (vp->v_usecount > 1) {
1782 VOP_UNLOCK(vp, 0);
1783 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1784 return (EBUSY);
1785 }
1786 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1787 VOP_UNLOCK(vp, 0);
1788 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1789 return (error);
1790 }
1791 /* XXX: eventually we should handle VREG, too. */
1792 if (va.va_type != VBLK) {
1793 VOP_UNLOCK(vp, 0);
1794 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1795 return (ENOTBLK);
1796 }
1797 VOP_UNLOCK(vp, 0);
1798 *vpp = vp;
1799 return (0);
1800 }
1801 /*
1802 * Wait interruptibly for an exclusive lock.
1803 *
1804 * XXX
1805 * Several drivers do this; it should be abstracted and made MP-safe.
1806 * (Hmm... where have we seen this warning before :-> GO )
1807 */
1808 static int
1809 raidlock(rs)
1810 struct raid_softc *rs;
1811 {
1812 int error;
1813
1814 while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
1815 rs->sc_flags |= RAIDF_WANTED;
1816 if ((error =
1817 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1818 return (error);
1819 }
1820 rs->sc_flags |= RAIDF_LOCKED;
1821 return (0);
1822 }
1823 /*
1824 * Unlock and wake up any waiters.
1825 */
1826 static void
1827 raidunlock(rs)
1828 struct raid_softc *rs;
1829 {
1830
1831 rs->sc_flags &= ~RAIDF_LOCKED;
1832 if ((rs->sc_flags & RAIDF_WANTED) != 0) {
1833 rs->sc_flags &= ~RAIDF_WANTED;
1834 wakeup(rs);
1835 }
1836 }
1837