udf_strat_rmw.c revision 1.9.4.5 1 /* $NetBSD: udf_strat_rmw.c,v 1.9.4.5 2008/12/10 22:18:20 snj Exp $ */
2
3 /*
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.9.4.5 2008/12/10 22:18:20 snj Exp $");
32 #endif /* not lint */
33
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_quota.h"
37 #include "opt_compat_netbsd.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysctl.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <miscfs/genfs/genfs_node.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/file.h>
51 #include <sys/device.h>
52 #include <sys/disklabel.h>
53 #include <sys/ioctl.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/stat.h>
57 #include <sys/conf.h>
58 #include <sys/kauth.h>
59 #include <sys/kthread.h>
60 #include <dev/clock_subr.h>
61
62 #include <fs/udf/ecma167-udf.h>
63 #include <fs/udf/udf_mount.h>
64
65 #include "udf.h"
66 #include "udf_subr.h"
67 #include "udf_bswap.h"
68
69
70 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
71 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
72 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
73
74 /* --------------------------------------------------------------------- */
75
76 #define UDF_MAX_PACKET_SIZE 64 /* DONT change this */
77
78 /* sheduler states */
79 #define UDF_SHED_WAITING 1 /* waiting on timeout */
80 #define UDF_SHED_READING 2
81 #define UDF_SHED_WRITING 3
82 #define UDF_SHED_SEQWRITING 4
83 #define UDF_SHED_IDLE 5 /* resting */
84 #define UDF_SHED_FREE 6 /* recycleable */
85 #define UDF_SHED_MAX 6+1
86
87 /* flags */
88 #define ECC_LOCKED 0x01 /* prevent access */
89 #define ECC_WANTED 0x02 /* trying access */
90 #define ECC_SEQWRITING 0x04 /* sequential queue */
91 #define ECC_FLOATING 0x08 /* not queued yet */
92
93 #define ECC_WAITTIME 10
94
95
96 TAILQ_HEAD(ecclineq, udf_eccline);
97 struct udf_eccline {
98 struct udf_mount *ump;
99 uint64_t present; /* preserve these */
100 uint64_t readin; /* bitmap */
101 uint64_t dirty; /* bitmap */
102 uint64_t error; /* bitmap */
103 uint32_t refcnt;
104
105 struct timespec wait_time;
106 uint32_t flags;
107 uint32_t start_sector; /* physical */
108
109 struct buf *buf;
110 void *blob;
111
112 struct buf *bufs[UDF_MAX_PACKET_SIZE];
113 uint32_t bufs_bpos[UDF_MAX_PACKET_SIZE];
114 int bufs_len[UDF_MAX_PACKET_SIZE];
115
116 int queued_on; /* on which BUFQ list */
117 LIST_ENTRY(udf_eccline) hashchain; /* on sector lookup */
118 };
119
120
121 struct strat_private {
122 lwp_t *queue_lwp;
123 kcondvar_t discstrat_cv; /* to wait on */
124 kmutex_t discstrat_mutex; /* disc strategy */
125 kmutex_t seqwrite_mutex; /* protect mappings */
126
127 int thread_running; /* thread control */
128 int run_thread; /* thread control */
129 int thread_finished; /* thread control */
130 int cur_queue;
131
132 int num_floating;
133 int num_queued[UDF_SHED_MAX];
134 struct bufq_state *queues[UDF_SHED_MAX];
135 struct timespec last_queued[UDF_SHED_MAX];
136 struct disk_strategy old_strategy_setting;
137
138 struct pool eccline_pool;
139 struct pool ecclineblob_pool;
140 LIST_HEAD(, udf_eccline) eccline_hash[UDF_ECCBUF_HASHSIZE];
141 };
142
143 /* --------------------------------------------------------------------- */
144
145 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
146 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
147
148 /* can be called with or without discstrat lock */
149 static void
150 udf_lock_eccline(struct udf_eccline *eccline)
151 {
152 struct strat_private *priv = PRIV(eccline->ump);
153 int waslocked, ret;
154
155 waslocked = mutex_owned(&priv->discstrat_mutex);
156 if (!waslocked)
157 mutex_enter(&priv->discstrat_mutex);
158
159 /* wait until its unlocked first */
160 while (eccline->flags & ECC_LOCKED) {
161 eccline->flags |= ECC_WANTED;
162 ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
163 hz/8);
164 if (ret == EWOULDBLOCK)
165 DPRINTF(LOCKING, ("eccline lock helt, waiting for "
166 "release"));
167 }
168 eccline->flags |= ECC_LOCKED;
169 eccline->flags &= ~ECC_WANTED;
170
171 if (!waslocked)
172 mutex_exit(&priv->discstrat_mutex);
173 }
174
175
176 /* can be called with or without discstrat lock */
177 static void
178 udf_unlock_eccline(struct udf_eccline *eccline)
179 {
180 struct strat_private *priv = PRIV(eccline->ump);
181 int waslocked;
182
183 waslocked = mutex_owned(&priv->discstrat_mutex);
184 if (!waslocked)
185 mutex_enter(&priv->discstrat_mutex);
186
187 eccline->flags &= ~ECC_LOCKED;
188 cv_broadcast(&priv->discstrat_cv);
189
190 if (!waslocked)
191 mutex_exit(&priv->discstrat_mutex);
192 }
193
194
195 /* NOTE discstrat_mutex should be held! */
196 static void
197 udf_dispose_eccline(struct udf_eccline *eccline)
198 {
199 struct strat_private *priv = PRIV(eccline->ump);
200 struct buf *ret;
201
202 KASSERT(mutex_owned(&priv->discstrat_mutex));
203
204 KASSERT(eccline->refcnt == 0);
205 KASSERT(eccline->dirty == 0);
206
207 DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
208 "present %0"PRIx64"\n", eccline->start_sector,
209 eccline->present));
210
211 if (eccline->queued_on) {
212 ret = BUFQ_CANCEL(priv->queues[eccline->queued_on], eccline->buf);
213 KASSERT(ret == eccline->buf);
214 priv->num_queued[eccline->queued_on]--;
215 }
216 LIST_REMOVE(eccline, hashchain);
217
218 if (eccline->flags & ECC_FLOATING) {
219 eccline->flags &= ~ECC_FLOATING;
220 priv->num_floating--;
221 }
222
223 putiobuf(eccline->buf);
224 pool_put(&priv->ecclineblob_pool, eccline->blob);
225 pool_put(&priv->eccline_pool, eccline);
226 }
227
228
229 /* NOTE discstrat_mutex should be held! */
230 static void
231 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
232 {
233 struct strat_private *priv = PRIV(eccline->ump);
234 struct buf *ret;
235 int curqueue;
236
237 KASSERT(mutex_owned(&priv->discstrat_mutex));
238
239 DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
240
241 /* requeue */
242 curqueue = eccline->queued_on;
243 if (curqueue) {
244 ret = BUFQ_CANCEL(priv->queues[curqueue], eccline->buf);
245
246 DPRINTF(PARANOIA, ("push_eccline BUFQ_CANCEL returned %p when "
247 "requested to remove %p from queue %d\n", ret,
248 eccline->buf, curqueue));
249 #ifdef DIAGNOSTIC
250 if (ret == NULL) {
251 int i;
252
253 printf("udf_push_eccline: bufq_cancel can't find "
254 "buffer; dumping queues\n");
255 for (i = 1; i < UDF_SHED_MAX; i++) {
256 printf("queue %d\n\t", i);
257 ret = BUFQ_GET(priv->queues[i]);
258 while (ret) {
259 printf("%p ", ret);
260 if (ret == eccline->buf)
261 printf("[<-] ");
262 ret = BUFQ_GET(priv->queues[i]);
263 }
264 printf("\n");
265 }
266 panic("fatal queue bug; exit");
267 }
268 #endif
269
270 KASSERT(ret == eccline->buf);
271 priv->num_queued[curqueue]--;
272 }
273
274 /* set buffer block numbers to make sure its queued correctly */
275 eccline->buf->b_lblkno = eccline->start_sector;
276 eccline->buf->b_blkno = eccline->start_sector;
277 eccline->buf->b_rawblkno = eccline->start_sector;
278
279 BUFQ_PUT(priv->queues[newqueue], eccline->buf);
280 eccline->queued_on = newqueue;
281 priv->num_queued[newqueue]++;
282 vfs_timestamp(&priv->last_queued[newqueue]);
283
284 if (eccline->flags & ECC_FLOATING) {
285 eccline->flags &= ~ECC_FLOATING;
286 priv->num_floating--;
287 }
288
289 /* tickle disc strategy statemachine */
290 if (newqueue != UDF_SHED_IDLE)
291 cv_signal(&priv->discstrat_cv);
292 }
293
294
295 static struct udf_eccline *
296 udf_pop_eccline(struct strat_private *priv, int queued_on)
297 {
298 struct udf_eccline *eccline;
299 struct buf *buf;
300
301 KASSERT(mutex_owned(&priv->discstrat_mutex));
302
303 buf = BUFQ_GET(priv->queues[queued_on]);
304 if (!buf) {
305 KASSERT(priv->num_queued[queued_on] == 0);
306 return NULL;
307 }
308
309 eccline = BTOE(buf);
310 KASSERT(eccline->queued_on == queued_on);
311 eccline->queued_on = 0;
312 priv->num_queued[queued_on]--;
313
314 if (eccline->flags & ECC_FLOATING)
315 panic("popping already marked floating eccline");
316 eccline->flags |= ECC_FLOATING;
317 priv->num_floating++;
318
319 DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
320 eccline->buf, queued_on));
321
322 return eccline;
323 }
324
325
326 static struct udf_eccline *
327 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
328 {
329 struct strat_private *priv = PRIV(ump);
330 struct udf_eccline *eccline;
331 uint32_t start_sector, lb_size, blobsize;
332 uint8_t *eccline_blob;
333 int line, line_offset;
334 int num_busy, ret;
335
336 line_offset = sector % ump->packet_size;
337 start_sector = sector - line_offset;
338 line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
339
340 mutex_enter(&priv->discstrat_mutex);
341 KASSERT(priv->thread_running);
342
343 retry:
344 DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
345 LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
346 if (eccline->start_sector == start_sector) {
347 DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
348 eccline->start_sector));
349
350 UDF_LOCK_ECCLINE(eccline);
351 /* move from freelist (!) */
352 if (eccline->queued_on == UDF_SHED_FREE) {
353 DPRINTF(ECCLINE, ("was on freelist\n"));
354 KASSERT(eccline->refcnt == 0);
355 udf_push_eccline(eccline, UDF_SHED_IDLE);
356 }
357 eccline->refcnt++;
358 mutex_exit(&priv->discstrat_mutex);
359 return eccline;
360 }
361 }
362
363 DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
364 /* not found in eccline cache */
365
366 lb_size = udf_rw32(ump->logical_vol->lb_size);
367 blobsize = ump->packet_size * lb_size;
368
369 /* dont allow too many pending requests */
370 DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
371 num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
372 if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
373 ret = cv_timedwait(&priv->discstrat_cv,
374 &priv->discstrat_mutex, hz/8);
375 goto retry;
376 }
377
378 eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
379 eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
380 if ((eccline_blob == NULL) || (eccline == NULL)) {
381 if (eccline_blob)
382 pool_put(&priv->ecclineblob_pool, eccline_blob);
383 if (eccline)
384 pool_put(&priv->eccline_pool, eccline);
385
386 /* out of memory for now; canibalise freelist */
387 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
388 if (eccline == NULL) {
389 /* serious trouble; wait and retry */
390 cv_timedwait(&priv->discstrat_cv,
391 &priv->discstrat_mutex, hz/8);
392 goto retry;
393 }
394 /* push back line if we're waiting for it */
395 if (eccline->flags & ECC_WANTED) {
396 udf_push_eccline(eccline, UDF_SHED_IDLE);
397 goto retry;
398 }
399
400 /* unlink this entry */
401 LIST_REMOVE(eccline, hashchain);
402
403 KASSERT(eccline->flags & ECC_FLOATING);
404
405 eccline_blob = eccline->blob;
406 memset(eccline, 0, sizeof(struct udf_eccline));
407 eccline->flags = ECC_FLOATING;
408 } else {
409 memset(eccline, 0, sizeof(struct udf_eccline));
410 eccline->flags = ECC_FLOATING;
411 priv->num_floating++;
412 }
413
414 eccline->queued_on = 0;
415 eccline->blob = eccline_blob;
416 eccline->buf = getiobuf(NULL, true);
417 eccline->buf->b_private = eccline; /* IMPORTANT */
418
419 /* initialise eccline blob */
420 memset(eccline->blob, 0, blobsize);
421
422 eccline->ump = ump;
423 eccline->present = eccline->readin = eccline->dirty = 0;
424 eccline->error = 0;
425 eccline->refcnt = 0;
426
427 eccline->start_sector = start_sector;
428 eccline->buf->b_lblkno = start_sector;
429 eccline->buf->b_blkno = start_sector;
430 eccline->buf->b_rawblkno = start_sector;
431
432 LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
433
434 /*
435 * TODO possible optimalisation for checking overlap with partitions
436 * to get a clue on future eccline usage
437 */
438 eccline->refcnt++;
439 UDF_LOCK_ECCLINE(eccline);
440
441 mutex_exit(&priv->discstrat_mutex);
442
443 return eccline;
444 }
445
446
447 static void
448 udf_puteccline(struct udf_eccline *eccline)
449 {
450 struct strat_private *priv = PRIV(eccline->ump);
451 struct udf_mount *ump = eccline->ump;
452 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
453
454 mutex_enter(&priv->discstrat_mutex);
455
456 /* clear directly all readin requests from present ones */
457 if (eccline->readin & eccline->present) {
458 /* clear all read bits that are already read in */
459 eccline->readin &= (~eccline->present) & allbits;
460 wakeup(eccline);
461 }
462
463 DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
464 eccline->start_sector, eccline->refcnt));
465
466 /* if we have active nodes we dont set it on seqwriting */
467 if (eccline->refcnt > 1)
468 eccline->flags &= ~ECC_SEQWRITING;
469
470 vfs_timestamp(&eccline->wait_time);
471 eccline->wait_time.tv_sec += ECC_WAITTIME;
472 udf_push_eccline(eccline, UDF_SHED_WAITING);
473
474 KASSERT(eccline->refcnt >= 1);
475 eccline->refcnt--;
476 UDF_UNLOCK_ECCLINE(eccline);
477
478 wakeup(eccline);
479 mutex_exit(&priv->discstrat_mutex);
480 }
481
482 /* --------------------------------------------------------------------- */
483
484 static int
485 udf_create_nodedscr_rmw(struct udf_strat_args *args)
486 {
487 union dscrptr **dscrptr = &args->dscr;
488 struct udf_mount *ump = args->ump;
489 struct long_ad *icb = args->icb;
490 struct udf_eccline *eccline;
491 uint64_t bit;
492 uint32_t sectornr, lb_size, dummy;
493 uint8_t *mem;
494 int error, eccsect;
495
496 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
497 if (error)
498 return error;
499
500 lb_size = udf_rw32(ump->logical_vol->lb_size);
501
502 /* get our eccline */
503 eccline = udf_geteccline(ump, sectornr, 0);
504 eccsect = sectornr - eccline->start_sector;
505
506 bit = (uint64_t) 1 << eccsect;
507 eccline->readin &= ~bit; /* just in case */
508 eccline->present |= bit;
509 eccline->dirty &= ~bit; /* Err... euhm... clean? */
510
511 eccline->refcnt++;
512
513 /* clear space */
514 mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
515 memset(mem, 0, lb_size);
516
517 udf_puteccline(eccline);
518
519 *dscrptr = (union dscrptr *) mem;
520 return 0;
521 }
522
523
524 static void
525 udf_free_nodedscr_rmw(struct udf_strat_args *args)
526 {
527 struct udf_mount *ump = args->ump;
528 struct long_ad *icb = args->icb;
529 struct udf_eccline *eccline;
530 uint64_t bit;
531 uint32_t sectornr, dummy;
532 int error, eccsect;
533
534 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
535 if (error)
536 return;
537
538 /* get our eccline */
539 eccline = udf_geteccline(ump, sectornr, 0);
540 eccsect = sectornr - eccline->start_sector;
541
542 bit = (uint64_t) 1 << eccsect;
543 eccline->readin &= ~bit; /* just in case */
544
545 KASSERT(eccline->refcnt >= 1);
546 eccline->refcnt--;
547
548 udf_puteccline(eccline);
549 }
550
551
552 static int
553 udf_read_nodedscr_rmw(struct udf_strat_args *args)
554 {
555 union dscrptr **dscrptr = &args->dscr;
556 struct udf_mount *ump = args->ump;
557 struct long_ad *icb = args->icb;
558 struct udf_eccline *eccline;
559 uint64_t bit;
560 uint32_t sectornr, dummy;
561 uint8_t *pos;
562 int sector_size = ump->discinfo.sector_size;
563 int lb_size = udf_rw32(ump->logical_vol->lb_size);
564 int i, error, dscrlen, eccsect;
565
566 lb_size = lb_size;
567 KASSERT(sector_size == lb_size);
568 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
569 if (error)
570 return error;
571
572 /* get our eccline */
573 eccline = udf_geteccline(ump, sectornr, 0);
574 eccsect = sectornr - eccline->start_sector;
575
576 bit = (uint64_t) 1 << eccsect;
577 if ((eccline->present & bit) == 0) {
578 /* mark bit for readin */
579 eccline->readin |= bit;
580 eccline->refcnt++; /* prevent recycling */
581 KASSERT(eccline->bufs[eccsect] == NULL);
582 udf_puteccline(eccline);
583
584 /* wait for completion; XXX remodel to lock bit code */
585 error = 0;
586 while ((eccline->present & bit) == 0) {
587 tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
588 if (eccline->error & bit) {
589 KASSERT(eccline->refcnt >= 1);
590 eccline->refcnt--; /* undo temp refcnt */
591 *dscrptr = NULL;
592 return EIO; /* XXX error code */
593 }
594 }
595
596 /* reget our line */
597 eccline = udf_geteccline(ump, sectornr, 0);
598 KASSERT(eccline->refcnt >= 1);
599 eccline->refcnt--; /* undo refcnt */
600 }
601
602 *dscrptr = (union dscrptr *)
603 (((uint8_t *) eccline->blob) + eccsect * sector_size);
604
605 /* code from read_phys_descr */
606 /* check if its a valid tag */
607 error = udf_check_tag(*dscrptr);
608 if (error) {
609 /* check if its an empty block */
610 pos = (uint8_t *) *dscrptr;
611 for (i = 0; i < sector_size; i++, pos++) {
612 if (*pos) break;
613 }
614 if (i == sector_size) {
615 /* return no error but with no dscrptr */
616 error = 0;
617 }
618 *dscrptr = NULL;
619 udf_puteccline(eccline);
620 return error;
621 }
622
623 /* calculate descriptor size */
624 dscrlen = udf_tagsize(*dscrptr, sector_size);
625 error = udf_check_tag_payload(*dscrptr, dscrlen);
626 if (error) {
627 *dscrptr = NULL;
628 udf_puteccline(eccline);
629 return error;
630 }
631
632 eccline->refcnt++;
633 udf_puteccline(eccline);
634
635 return 0;
636 }
637
638
639 static int
640 udf_write_nodedscr_rmw(struct udf_strat_args *args)
641 {
642 union dscrptr *dscrptr = args->dscr;
643 struct udf_mount *ump = args->ump;
644 struct long_ad *icb = args->icb;
645 struct udf_node *udf_node = args->udf_node;
646 struct udf_eccline *eccline;
647 uint64_t bit;
648 uint32_t sectornr, logsectornr, dummy;
649 // int waitfor = args->waitfor;
650 int sector_size = ump->discinfo.sector_size;
651 int lb_size = udf_rw32(ump->logical_vol->lb_size);
652 int error, eccsect;
653
654 lb_size = lb_size;
655 KASSERT(sector_size == lb_size);
656 sectornr = 0;
657 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
658 if (error)
659 return error;
660
661 /* add reference to the vnode to prevent recycling */
662 vhold(udf_node->vnode);
663
664 /* get our eccline */
665 eccline = udf_geteccline(ump, sectornr, 0);
666 eccsect = sectornr - eccline->start_sector;
667
668 bit = (uint64_t) 1 << eccsect;
669
670 /* old callback still pending? */
671 if (eccline->bufs[eccsect]) {
672 DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
673 " over buffer?\n"));
674 nestiobuf_done(eccline->bufs[eccsect],
675 eccline->bufs_len[eccsect],
676 0);
677 eccline->bufs[eccsect] = NULL;
678 }
679
680 /* set sector number in the descriptor and validate */
681 dscrptr = (union dscrptr *)
682 (((uint8_t *) eccline->blob) + eccsect * sector_size);
683 KASSERT(dscrptr == args->dscr);
684
685 logsectornr = udf_rw32(icb->loc.lb_num);
686 dscrptr->tag.tag_loc = udf_rw32(logsectornr);
687 udf_validate_tag_and_crc_sums(dscrptr);
688
689 udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
690
691 /* set our flags */
692 KASSERT(eccline->present & bit);
693 eccline->dirty |= bit;
694
695 KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
696
697 udf_puteccline(eccline);
698
699 holdrele(udf_node->vnode);
700 udf_node->outstanding_nodedscr--;
701 if (udf_node->outstanding_nodedscr == 0) {
702 UDF_UNLOCK_NODE(udf_node, udf_node->i_flags & IN_CALLBACK_ULK);
703 wakeup(&udf_node->outstanding_nodedscr);
704 }
705
706 /* XXX waitfor not used */
707 return 0;
708 }
709
710
711 static void
712 udf_queuebuf_rmw(struct udf_strat_args *args)
713 {
714 struct udf_mount *ump = args->ump;
715 struct buf *buf = args->nestbuf;
716 struct desc_tag *tag;
717 struct strat_private *priv = PRIV(ump);
718 struct udf_eccline *eccline;
719 struct long_ad *node_ad_cpy;
720 uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
721 uint32_t buf_len, len, sectors, sectornr, our_sectornr;
722 uint32_t bpos;
723 uint16_t vpart_num;
724 uint8_t *fidblk, *src, *dst;
725 int sector_size = ump->discinfo.sector_size;
726 int blks = sector_size / DEV_BSIZE;
727 int eccsect, what, queue, error;
728
729 KASSERT(ump);
730 KASSERT(buf);
731 KASSERT(buf->b_iodone == nestiobuf_iodone);
732
733 blknr = buf->b_blkno;
734 our_sectornr = blknr / blks;
735
736 what = buf->b_udf_c_type;
737 queue = UDF_SHED_READING;
738 if ((buf->b_flags & B_READ) == 0) {
739 /* writing */
740 queue = UDF_SHED_SEQWRITING;
741 if (what == UDF_C_DSCR)
742 queue = UDF_SHED_WRITING;
743 if (what == UDF_C_NODE)
744 queue = UDF_SHED_WRITING;
745 }
746
747 if (queue == UDF_SHED_READING) {
748 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
749 "b_resid %d, b_bcount %d, b_bufsize %d\n",
750 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
751 buf->b_resid, buf->b_bcount, buf->b_bufsize));
752
753 /* mark bits for reading */
754 buf_len = buf->b_bcount;
755 sectornr = our_sectornr;
756 eccline = udf_geteccline(ump, sectornr, 0);
757 eccsect = sectornr - eccline->start_sector;
758 bpos = 0;
759 while (buf_len) {
760 len = MIN(buf_len, sector_size);
761 if (eccsect == ump->packet_size) {
762 udf_puteccline(eccline);
763 eccline = udf_geteccline(ump, sectornr, 0);
764 eccsect = sectornr - eccline->start_sector;
765 }
766 bit = (uint64_t) 1 << eccsect;
767 error = eccline->error & bit ? EIO : 0;
768 if (eccline->present & bit) {
769 src = (uint8_t *) eccline->blob +
770 eccsect * sector_size;
771 dst = (uint8_t *) buf->b_data + bpos;
772 if (!error)
773 memcpy(dst, src, len);
774 nestiobuf_done(buf, len, error);
775 } else {
776 eccline->readin |= bit;
777 KASSERT(eccline->bufs[eccsect] == NULL);
778 eccline->bufs[eccsect] = buf;
779 eccline->bufs_bpos[eccsect] = bpos;
780 eccline->bufs_len[eccsect] = len;
781 }
782 bpos += sector_size;
783 eccsect++;
784 sectornr++;
785 buf_len -= len;
786 }
787 udf_puteccline(eccline);
788 return;
789 }
790
791 if (queue == UDF_SHED_WRITING) {
792 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
793 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
794 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
795 buf->b_resid, buf->b_bcount, buf->b_bufsize));
796 /* if we have FIDs fixup using buffer's sector number(s) */
797 if (buf->b_udf_c_type == UDF_C_FIDS) {
798 panic("UDF_C_FIDS in SHED_WRITING!\n");
799 #if 0
800 buf_len = buf->b_bcount;
801 sectornr = our_sectornr;
802 bpos = 0;
803 while (buf_len) {
804 len = MIN(buf_len, sector_size);
805 fidblk = (uint8_t *) buf->b_data + bpos;
806 udf_fixup_fid_block(fidblk, sector_size,
807 0, len, sectornr);
808 sectornr++;
809 bpos += len;
810 buf_len -= len;
811 }
812 #endif
813 }
814 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
815
816 /* copy parts into the bufs and set for writing */
817 buf_len = buf->b_bcount;
818 sectornr = our_sectornr;
819 eccline = udf_geteccline(ump, sectornr, 0);
820 eccsect = sectornr - eccline->start_sector;
821 bpos = 0;
822 while (buf_len) {
823 len = MIN(buf_len, sector_size);
824 if (eccsect == ump->packet_size) {
825 udf_puteccline(eccline);
826 eccline = udf_geteccline(ump, sectornr, 0);
827 eccsect = sectornr - eccline->start_sector;
828 }
829 bit = (uint64_t) 1 << eccsect;
830 KASSERT((eccline->readin & bit) == 0);
831 eccline->present |= bit;
832 eccline->dirty |= bit;
833 if (eccline->bufs[eccsect]) {
834 /* old callback still pending */
835 nestiobuf_done(eccline->bufs[eccsect],
836 eccline->bufs_len[eccsect],
837 0);
838 eccline->bufs[eccsect] = NULL;
839 }
840
841 src = (uint8_t *) buf->b_data + bpos;
842 dst = (uint8_t *) eccline->blob + eccsect * sector_size;
843 if (len != sector_size)
844 memset(dst, 0, sector_size);
845 memcpy(dst, src, len);
846
847 /* note that its finished for this extent */
848 eccline->bufs[eccsect] = NULL;
849 nestiobuf_done(buf, len, 0);
850
851 bpos += sector_size;
852 eccsect++;
853 sectornr++;
854 buf_len -= len;
855 }
856 udf_puteccline(eccline);
857 return;
858
859 }
860
861 /* sequential writing */
862 KASSERT(queue == UDF_SHED_SEQWRITING);
863 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
864 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
865 buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
866 buf->b_bufsize));
867 /*
868 * Buffers should not have been allocated to disc addresses yet on
869 * this queue. Note that a buffer can get multiple extents allocated.
870 * Note that it *looks* like the normal writing but its different in
871 * the details.
872 *
873 * lmapping contains lb_num relative to base partition.
874 *
875 * XXX should we try to claim/organize the allocated memory to
876 * block-aligned pieces?
877 */
878 mutex_enter(&priv->seqwrite_mutex);
879
880 lmapping = ump->la_lmapping;
881 node_ad_cpy = ump->la_node_ad_cpy;
882
883 /* logically allocate buf and map it in the file */
884 udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
885
886 /* if we have FIDs, fixup using the new allocation table */
887 if (buf->b_udf_c_type == UDF_C_FIDS) {
888 buf_len = buf->b_bcount;
889 bpos = 0;
890 lmappos = lmapping;
891 while (buf_len) {
892 sectornr = *lmappos++;
893 len = MIN(buf_len, sector_size);
894 fidblk = (uint8_t *) buf->b_data + bpos;
895 udf_fixup_fid_block(fidblk, sector_size,
896 0, len, sectornr);
897 bpos += len;
898 buf_len -= len;
899 }
900 }
901 if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
902 if (buf->b_lblkno == 0) {
903 /* update the tag location inside */
904 tag = (struct desc_tag *) buf->b_data;
905 tag->tag_loc = udf_rw32(*lmapping);
906 udf_validate_tag_and_crc_sums(buf->b_data);
907 }
908 }
909 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
910
911 /*
912 * Translate new mappings in lmapping to pmappings.
913 * pmapping to contain lb_nums as used for disc adressing.
914 */
915 pmapping = ump->la_pmapping;
916 sectors = (buf->b_bcount + sector_size -1) / sector_size;
917 udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
918
919 /* copy parts into the bufs and set for writing */
920 pmappos = pmapping;
921 buf_len = buf->b_bcount;
922 sectornr = *pmappos++;
923 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
924 eccsect = sectornr - eccline->start_sector;
925 bpos = 0;
926 while (buf_len) {
927 len = MIN(buf_len, sector_size);
928 eccsect = sectornr - eccline->start_sector;
929 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
930 eccline->flags |= ECC_SEQWRITING;
931 udf_puteccline(eccline);
932 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
933 eccsect = sectornr - eccline->start_sector;
934 }
935 bit = (uint64_t) 1 << eccsect;
936 KASSERT((eccline->readin & bit) == 0);
937 eccline->present |= bit;
938 eccline->dirty |= bit;
939 eccline->bufs[eccsect] = NULL;
940
941 src = (uint8_t *) buf->b_data + bpos;
942 dst = (uint8_t *)
943 eccline->blob + eccsect * sector_size;
944 if (len != sector_size)
945 memset(dst, 0, sector_size);
946 memcpy(dst, src, len);
947
948 /* note that its finished for this extent */
949 nestiobuf_done(buf, len, 0);
950
951 bpos += sector_size;
952 sectornr = *pmappos++;
953 buf_len -= len;
954 }
955 eccline->flags |= ECC_SEQWRITING;
956 udf_puteccline(eccline);
957 mutex_exit(&priv->seqwrite_mutex);
958 }
959
960 /* --------------------------------------------------------------------- */
961
962 static void
963 udf_shedule_read_callback(struct buf *buf)
964 {
965 struct udf_eccline *eccline = BTOE(buf);
966 struct udf_mount *ump = eccline->ump;
967 uint64_t bit;
968 uint8_t *src, *dst;
969 int sector_size = ump->discinfo.sector_size;
970 int error, i, len;
971
972 DPRINTF(ECCLINE, ("read callback called\n"));
973 /* post process read action */
974 error = buf->b_error;
975 for (i = 0; i < ump->packet_size; i++) {
976 bit = (uint64_t) 1 << i;
977 src = (uint8_t *) buf->b_data + i * sector_size;
978 dst = (uint8_t *) eccline->blob + i * sector_size;
979 if (eccline->present & bit)
980 continue;
981 eccline->present |= bit;
982 if (error)
983 eccline->error |= bit;
984 if (eccline->bufs[i]) {
985 dst = (uint8_t *) eccline->bufs[i]->b_data +
986 eccline->bufs_bpos[i];
987 len = eccline->bufs_len[i];
988 if (!error)
989 memcpy(dst, src, len);
990 nestiobuf_done(eccline->bufs[i], len, error);
991 eccline->bufs[i] = NULL;
992 }
993
994 }
995 KASSERT(buf->b_data == eccline->blob);
996 KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
997
998 /*
999 * XXX TODO what to do on read errors? read in all sectors
1000 * synchronously and allocate a sparable entry?
1001 */
1002
1003 udf_puteccline(eccline);
1004 DPRINTF(ECCLINE, ("read callback finished\n"));
1005 }
1006
1007
1008 static void
1009 udf_shedule_write_callback(struct buf *buf)
1010 {
1011 struct udf_eccline *eccline = BTOE(buf);
1012 struct udf_mount *ump = eccline->ump;
1013 uint64_t bit;
1014 int error, i, len;
1015
1016 DPRINTF(ECCLINE, ("write callback called\n"));
1017 /* post process write action */
1018 error = buf->b_error;
1019 for (i = 0; i < ump->packet_size; i++) {
1020 bit = (uint64_t) 1 << i;
1021 if ((eccline->dirty & bit) == 0)
1022 continue;
1023 if (error) {
1024 eccline->error |= bit;
1025 } else {
1026 eccline->dirty &= ~bit;
1027 }
1028 if (eccline->bufs[i]) {
1029 len = eccline->bufs_len[i];
1030 nestiobuf_done(eccline->bufs[i], len, error);
1031 eccline->bufs[i] = NULL;
1032 }
1033 }
1034 KASSERT(eccline->dirty == 0);
1035
1036 KASSERT(error == 0);
1037 /*
1038 * XXX TODO on write errors allocate a sparable entry and reissue
1039 */
1040
1041 udf_puteccline(eccline);
1042 }
1043
1044
1045 static void
1046 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
1047 {
1048 struct udf_mount *ump = eccline->ump;
1049 struct strat_private *priv = PRIV(ump);
1050 struct buf *buf, *nestbuf;
1051 uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
1052 uint32_t start;
1053 int sector_size = ump->discinfo.sector_size;
1054 int blks = sector_size / DEV_BSIZE;
1055 int i;
1056
1057 if (queued_on == UDF_SHED_READING) {
1058 DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
1059 /* read all bits that are not yet present */
1060 eccline->readin = (~eccline->present) & allbits;
1061 KASSERT(eccline->readin);
1062 start = eccline->start_sector;
1063 buf = eccline->buf;
1064 buf_init(buf);
1065 buf->b_flags = B_READ | B_ASYNC;
1066 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1067 buf->b_oflags = 0;
1068 buf->b_iodone = udf_shedule_read_callback;
1069 buf->b_data = eccline->blob;
1070 buf->b_bcount = ump->packet_size * sector_size;
1071 buf->b_resid = buf->b_bcount;
1072 buf->b_bufsize = buf->b_bcount;
1073 buf->b_private = eccline;
1074 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1075 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1076 buf->b_proc = NULL;
1077
1078 if (eccline->present != 0) {
1079 for (i = 0; i < ump->packet_size; i++) {
1080 bit = (uint64_t) 1 << i;
1081 if (eccline->present & bit) {
1082 nestiobuf_done(buf, sector_size, 0);
1083 continue;
1084 }
1085 nestbuf = getiobuf(NULL, true);
1086 nestiobuf_setup(buf, nestbuf, i * sector_size,
1087 sector_size);
1088 /* adjust blocknumber to read */
1089 nestbuf->b_blkno = buf->b_blkno + i*blks;
1090 nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
1091
1092 DPRINTF(SHEDULE, ("sector %d ",
1093 start + i));
1094 /* call asynchronous */
1095 VOP_STRATEGY(ump->devvp, nestbuf);
1096 }
1097 DPRINTF(SHEDULE, ("\n"));
1098 return;
1099 }
1100 } else {
1101 /* write or seqwrite */
1102 DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
1103 DPRINTF(SHEDULE, ("\n\tpresent %"PRIx64", readin %"PRIx64", "
1104 "dirty %"PRIx64"\n\t", eccline->present, eccline->readin,
1105 eccline->dirty));
1106 if (eccline->present != allbits) {
1107 /* requeue to read-only */
1108 DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
1109 "reading\n"));
1110 udf_push_eccline(eccline, UDF_SHED_READING);
1111 return;
1112 }
1113 start = eccline->start_sector;
1114 buf = eccline->buf;
1115 buf_init(buf);
1116 buf->b_flags = B_WRITE | B_ASYNC;
1117 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1118 buf->b_oflags = 0;
1119 buf->b_iodone = udf_shedule_write_callback;
1120 buf->b_data = eccline->blob;
1121 buf->b_bcount = ump->packet_size * sector_size;
1122 buf->b_resid = buf->b_bcount;
1123 buf->b_bufsize = buf->b_bcount;
1124 buf->b_private = eccline;
1125 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1126 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1127 buf->b_proc = NULL;
1128 }
1129
1130 mutex_exit(&priv->discstrat_mutex);
1131 /* call asynchronous */
1132 DPRINTF(SHEDULE, ("sector %d for %d\n",
1133 start, ump->packet_size));
1134 VOP_STRATEGY(ump->devvp, buf);
1135 mutex_enter(&priv->discstrat_mutex);
1136 }
1137
1138
1139 static void
1140 udf_discstrat_thread(void *arg)
1141 {
1142 struct udf_mount *ump = (struct udf_mount *) arg;
1143 struct strat_private *priv = PRIV(ump);
1144 struct udf_eccline *eccline;
1145 struct timespec now, *last;
1146 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
1147 int new_queue, wait, work, num, cnt;
1148
1149 work = 1;
1150 priv->thread_running = 1;
1151 mutex_enter(&priv->discstrat_mutex);
1152 priv->num_floating = 0;
1153 while (priv->run_thread || work || priv->num_floating) {
1154 /* get our time */
1155 vfs_timestamp(&now);
1156
1157 /* maintenance: handle eccline state machine */
1158 num = priv->num_queued[UDF_SHED_WAITING];
1159 cnt = 0;
1160 while (cnt < num) {
1161 eccline = udf_pop_eccline(priv, UDF_SHED_WAITING);
1162 /* requeue */
1163 new_queue = UDF_SHED_FREE;
1164 if (eccline->refcnt > 0)
1165 new_queue = UDF_SHED_IDLE;
1166 if (eccline->flags & ECC_WANTED)
1167 new_queue = UDF_SHED_IDLE;
1168 if (eccline->readin)
1169 new_queue = UDF_SHED_READING;
1170 if (eccline->dirty) {
1171 new_queue = UDF_SHED_WAITING;
1172 if ((eccline->wait_time.tv_sec - now.tv_sec <= 0) ||
1173 ((eccline->present == allbits) &&
1174 (eccline->flags & ECC_SEQWRITING)))
1175 {
1176 new_queue = UDF_SHED_WRITING;
1177 if (eccline->flags & ECC_SEQWRITING)
1178 new_queue = UDF_SHED_SEQWRITING;
1179 if (eccline->present != allbits)
1180 new_queue = UDF_SHED_READING;
1181 }
1182 }
1183 udf_push_eccline(eccline, new_queue);
1184 cnt++;
1185 }
1186
1187 /* maintenance: free exess ecclines */
1188 while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
1189 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1190 KASSERT(eccline);
1191 KASSERT(eccline->refcnt == 0);
1192 if (eccline->flags & ECC_WANTED) {
1193 udf_push_eccline(eccline, UDF_SHED_IDLE);
1194 DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
1195 } else {
1196 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
1197 udf_dispose_eccline(eccline);
1198 }
1199 }
1200
1201 /* process the current selected queue */
1202 /* get our time */
1203 vfs_timestamp(&now);
1204 last = &priv->last_queued[priv->cur_queue];
1205
1206 /* get our line */
1207 eccline = udf_pop_eccline(priv, priv->cur_queue);
1208 if (eccline) {
1209 wait = 0;
1210 new_queue = priv->cur_queue;
1211 DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
1212
1213 /* complete the `get' by locking and refcounting it */
1214 UDF_LOCK_ECCLINE(eccline);
1215 eccline->refcnt++;
1216
1217 udf_issue_eccline(eccline, priv->cur_queue);
1218 } else {
1219 /* don't switch too quickly */
1220 if (now.tv_sec - last->tv_sec < 2) {
1221 /* wait some time */
1222 cv_timedwait(&priv->discstrat_cv,
1223 &priv->discstrat_mutex, hz);
1224 /* we assume there is work to be done */
1225 work = 1;
1226 continue;
1227 }
1228
1229 /* XXX select on queue lengths ? */
1230 wait = 1;
1231 /* check if we can/should switch */
1232 new_queue = priv->cur_queue;
1233 if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
1234 new_queue = UDF_SHED_READING;
1235 if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))
1236 new_queue = UDF_SHED_WRITING;
1237 if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
1238 new_queue = UDF_SHED_SEQWRITING;
1239 }
1240
1241 /* give room */
1242 mutex_exit(&priv->discstrat_mutex);
1243
1244 if (new_queue != priv->cur_queue) {
1245 wait = 0;
1246 DPRINTF(SHEDULE, ("switching from %d to %d\n",
1247 priv->cur_queue, new_queue));
1248 priv->cur_queue = new_queue;
1249 }
1250 mutex_enter(&priv->discstrat_mutex);
1251
1252 /* wait for more if needed */
1253 if (wait)
1254 cv_timedwait(&priv->discstrat_cv,
1255 &priv->discstrat_mutex, hz/4); /* /8 */
1256
1257 work = (BUFQ_PEEK(priv->queues[UDF_SHED_WAITING]) != NULL);
1258 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL);
1259 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL);
1260 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
1261
1262 DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
1263 (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL),
1264 (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL),
1265 (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
1266 work, priv->num_floating));
1267 }
1268
1269 mutex_exit(&priv->discstrat_mutex);
1270
1271 /* tear down remaining ecclines */
1272 mutex_enter(&priv->discstrat_mutex);
1273 KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0);
1274 KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
1275 KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
1276 KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
1277 KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
1278
1279 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WAITING]) == NULL);
1280 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_IDLE]) == NULL);
1281 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
1282 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
1283 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
1284 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1285 while (eccline) {
1286 udf_dispose_eccline(eccline);
1287 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1288 }
1289 KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
1290 mutex_exit(&priv->discstrat_mutex);
1291
1292 priv->thread_running = 0;
1293 priv->thread_finished = 1;
1294 wakeup(&priv->run_thread);
1295 kthread_exit(0);
1296 /* not reached */
1297 }
1298
1299 /* --------------------------------------------------------------------- */
1300
1301 /*
1302 * Buffer memory pool allocator.
1303 */
1304
1305 static void *
1306 ecclinepool_page_alloc(struct pool *pp, int flags)
1307 {
1308 return (void *)uvm_km_alloc(kernel_map,
1309 MAXBSIZE, MAXBSIZE,
1310 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
1311 | UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
1312 }
1313
1314 static void
1315 ecclinepool_page_free(struct pool *pp, void *v)
1316 {
1317 uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
1318 }
1319
1320 static struct pool_allocator ecclinepool_allocator = {
1321 .pa_alloc = ecclinepool_page_alloc,
1322 .pa_free = ecclinepool_page_free,
1323 .pa_pagesz = MAXBSIZE,
1324 };
1325
1326
1327 static void
1328 udf_discstrat_init_rmw(struct udf_strat_args *args)
1329 {
1330 struct udf_mount *ump = args->ump;
1331 struct strat_private *priv = PRIV(ump);
1332 uint32_t lb_size, blobsize, hashline;
1333 int i;
1334
1335 KASSERT(ump);
1336 KASSERT(ump->logical_vol);
1337 KASSERT(priv == NULL);
1338
1339 lb_size = udf_rw32(ump->logical_vol->lb_size);
1340 blobsize = ump->packet_size * lb_size;
1341 KASSERT(lb_size > 0);
1342 KASSERT(ump->packet_size <= 64);
1343
1344 /* initialise our memory space */
1345 ump->strategy_private = malloc(sizeof(struct strat_private),
1346 M_UDFTEMP, M_WAITOK);
1347 priv = ump->strategy_private;
1348 memset(priv, 0 , sizeof(struct strat_private));
1349
1350 /* initialise locks */
1351 cv_init(&priv->discstrat_cv, "udfstrat");
1352 mutex_init(&priv->discstrat_mutex, MUTEX_DRIVER, IPL_BIO);
1353 mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
1354
1355 /* initialise struct eccline pool */
1356 pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
1357 0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
1358
1359 /* initialise eccline blob pool */
1360 ecclinepool_allocator.pa_pagesz = blobsize;
1361 pool_init(&priv->ecclineblob_pool, blobsize,
1362 0, 0, 0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
1363
1364 /* initialise main queues */
1365 for (i = 0; i < UDF_SHED_MAX; i++) {
1366 priv->num_queued[i] = 0;
1367 vfs_timestamp(&priv->last_queued[i]);
1368 }
1369 bufq_alloc(&priv->queues[UDF_SHED_WAITING], "fcfs",
1370 BUFQ_SORT_RAWBLOCK);
1371 bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
1372 BUFQ_SORT_RAWBLOCK);
1373 bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
1374 BUFQ_SORT_RAWBLOCK);
1375 bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
1376
1377 /* initialise administrative queues */
1378 bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
1379 bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
1380
1381 for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
1382 LIST_INIT(&priv->eccline_hash[hashline]);
1383 }
1384
1385 /* create our disk strategy thread */
1386 priv->cur_queue = UDF_SHED_READING;
1387 priv->thread_finished = 0;
1388 priv->thread_running = 0;
1389 priv->run_thread = 1;
1390 if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
1391 udf_discstrat_thread, ump, &priv->queue_lwp,
1392 "%s", "udf_rw")) {
1393 panic("fork udf_rw");
1394 }
1395
1396 /* wait for thread to spin up */
1397 while (!priv->thread_running) {
1398 tsleep(&priv->thread_running, PRIBIO+1, "udfshedstart", hz);
1399 }
1400 }
1401
1402
1403 static void
1404 udf_discstrat_finish_rmw(struct udf_strat_args *args)
1405 {
1406 struct udf_mount *ump = args->ump;
1407 struct strat_private *priv = PRIV(ump);
1408 int error;
1409
1410 if (ump == NULL)
1411 return;
1412
1413 /* stop our sheduling thread */
1414 KASSERT(priv->run_thread == 1);
1415 priv->run_thread = 0;
1416 wakeup(priv->queue_lwp);
1417 while (!priv->thread_finished) {
1418 error = tsleep(&priv->run_thread, PRIBIO+1,
1419 "udfshedfin", hz);
1420 }
1421 /* kthread should be finished now */
1422
1423 /* cleanup our pools */
1424 pool_destroy(&priv->eccline_pool);
1425 pool_destroy(&priv->ecclineblob_pool);
1426
1427 cv_destroy(&priv->discstrat_cv);
1428 mutex_destroy(&priv->discstrat_mutex);
1429 mutex_destroy(&priv->seqwrite_mutex);
1430
1431 /* free our private space */
1432 free(ump->strategy_private, M_UDFTEMP);
1433 ump->strategy_private = NULL;
1434 }
1435
1436 /* --------------------------------------------------------------------- */
1437
1438 struct udf_strategy udf_strat_rmw =
1439 {
1440 udf_create_nodedscr_rmw,
1441 udf_free_nodedscr_rmw,
1442 udf_read_nodedscr_rmw,
1443 udf_write_nodedscr_rmw,
1444 udf_queuebuf_rmw,
1445 udf_discstrat_init_rmw,
1446 udf_discstrat_finish_rmw
1447 };
1448
1449