udf_strat_rmw.c revision 1.9.4.1 1 /* $NetBSD: udf_strat_rmw.c,v 1.9.4.1 2008/11/02 22:56:06 snj Exp $ */
2
3 /*
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.9.4.1 2008/11/02 22:56:06 snj Exp $");
32 #endif /* not lint */
33
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_quota.h"
37 #include "opt_compat_netbsd.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysctl.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <miscfs/genfs/genfs_node.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/file.h>
51 #include <sys/device.h>
52 #include <sys/disklabel.h>
53 #include <sys/ioctl.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/stat.h>
57 #include <sys/conf.h>
58 #include <sys/kauth.h>
59 #include <sys/kthread.h>
60 #include <dev/clock_subr.h>
61
62 #include <fs/udf/ecma167-udf.h>
63 #include <fs/udf/udf_mount.h>
64
65 #include "udf.h"
66 #include "udf_subr.h"
67 #include "udf_bswap.h"
68
69
70 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
71 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
72 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
73
74 /* --------------------------------------------------------------------- */
75
76 #define UDF_MAX_PACKET_SIZE 64 /* DONT change this */
77
78 /* sheduler states */
79 #define UDF_SHED_MAX 6
80 #define UDF_SHED_READING 1
81 #define UDF_SHED_WRITING 2
82 #define UDF_SHED_SEQWRITING 3
83 #define UDF_SHED_IDLE 4 /* resting */
84 #define UDF_SHED_FREE 5 /* recycleable */
85
86 /* flags */
87 #define ECC_LOCKED 0x01 /* prevent access */
88 #define ECC_WANTED 0x02 /* trying access */
89 #define ECC_SEQWRITING 0x04 /* sequential queue */
90 #define ECC_FLOATING 0x08 /* not queued yet */
91
92
93 TAILQ_HEAD(ecclineq, udf_eccline);
94 struct udf_eccline {
95 struct udf_mount *ump;
96 uint64_t present; /* preserve these */
97 uint64_t readin; /* bitmap */
98 uint64_t dirty; /* bitmap */
99 uint64_t error; /* bitmap */
100 uint32_t refcnt;
101
102 uint32_t flags;
103 uint32_t start_sector; /* physical */
104
105 struct buf *buf;
106 void *blob;
107
108 struct buf *bufs[UDF_MAX_PACKET_SIZE];
109 uint32_t bufs_bpos[UDF_MAX_PACKET_SIZE];
110 int bufs_len[UDF_MAX_PACKET_SIZE];
111
112 int queued_on; /* on which BUFQ list */
113 LIST_ENTRY(udf_eccline) hashchain; /* on sector lookup */
114 };
115
116
117 struct strat_private {
118 lwp_t *queue_lwp;
119 kcondvar_t discstrat_cv; /* to wait on */
120 kmutex_t discstrat_mutex; /* disc strategy */
121 kmutex_t seqwrite_mutex; /* protect mappings */
122
123 int run_thread; /* thread control */
124 int thread_finished; /* thread control */
125 int cur_queue;
126
127 int num_floating;
128 int num_queued[UDF_SHED_MAX];
129 struct bufq_state *queues[UDF_SHED_MAX];
130 struct timespec last_queued[UDF_SHED_MAX];
131 struct disk_strategy old_strategy_setting;
132
133 struct pool eccline_pool;
134 struct pool ecclineblob_pool;
135 LIST_HEAD(, udf_eccline) eccline_hash[UDF_ECCBUF_HASHSIZE];
136 };
137
138 /* --------------------------------------------------------------------- */
139
140 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
141 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
142
143 /* can be called with or without discstrat lock */
144 static void
145 udf_lock_eccline(struct udf_eccline *eccline)
146 {
147 struct strat_private *priv = PRIV(eccline->ump);
148 int waslocked, ret;
149
150 waslocked = mutex_owned(&priv->discstrat_mutex);
151 if (!waslocked)
152 mutex_enter(&priv->discstrat_mutex);
153
154 /* wait until its unlocked first */
155 while (eccline->flags & ECC_LOCKED) {
156 eccline->flags |= ECC_WANTED;
157 ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
158 hz/8);
159 if (ret == EWOULDBLOCK)
160 DPRINTF(LOCKING, ("eccline lock helt, waiting for "
161 "release"));
162 }
163 eccline->flags |= ECC_LOCKED;
164 eccline->flags &= ~ECC_WANTED;
165
166 if (!waslocked)
167 mutex_exit(&priv->discstrat_mutex);
168 }
169
170
171 /* can be called with or without discstrat lock */
172 static void
173 udf_unlock_eccline(struct udf_eccline *eccline)
174 {
175 struct strat_private *priv = PRIV(eccline->ump);
176 int waslocked;
177
178 KASSERT(mutex_owned(&priv->discstrat_mutex));
179
180 waslocked = mutex_owned(&priv->discstrat_mutex);
181 if (!waslocked)
182 mutex_enter(&priv->discstrat_mutex);
183
184 eccline->flags &= ~ECC_LOCKED;
185 cv_broadcast(&priv->discstrat_cv);
186
187 if (!waslocked)
188 mutex_exit(&priv->discstrat_mutex);
189 }
190
191
192 /* NOTE discstrat_mutex should be held! */
193 static void
194 udf_dispose_eccline(struct udf_eccline *eccline)
195 {
196 struct strat_private *priv = PRIV(eccline->ump);
197 struct buf *ret;
198
199 KASSERT(mutex_owned(&priv->discstrat_mutex));
200
201 KASSERT(eccline->refcnt == 0);
202 KASSERT(eccline->dirty == 0);
203
204 DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
205 "present %0"PRIx64"\n", eccline->start_sector,
206 eccline->present));
207
208 if (eccline->queued_on) {
209 ret = BUFQ_CANCEL(priv->queues[eccline->queued_on], eccline->buf);
210 KASSERT(ret == eccline->buf);
211 priv->num_queued[eccline->queued_on]--;
212 }
213 LIST_REMOVE(eccline, hashchain);
214
215 if (eccline->flags & ECC_FLOATING) {
216 eccline->flags &= ~ECC_FLOATING;
217 priv->num_floating--;
218 }
219
220 putiobuf(eccline->buf);
221 pool_put(&priv->ecclineblob_pool, eccline->blob);
222 pool_put(&priv->eccline_pool, eccline);
223 }
224
225
226 /* NOTE discstrat_mutex should be held! */
227 static void
228 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
229 {
230 struct strat_private *priv = PRIV(eccline->ump);
231 struct buf *ret;
232 int curqueue;
233
234 KASSERT(mutex_owned(&priv->discstrat_mutex));
235
236 DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
237
238 /* requeue */
239 curqueue = eccline->queued_on;
240 if (curqueue) {
241 ret = BUFQ_CANCEL(priv->queues[curqueue], eccline->buf);
242
243 DPRINTF(PARANOIA, ("push_eccline BUFQ_CANCEL returned %p when "
244 "requested to remove %p from queue %d\n", ret,
245 eccline->buf, curqueue));
246 #ifdef DIAGNOSTIC
247 if (ret == NULL) {
248 int i;
249
250 printf("udf_push_eccline: bufq_cancel can't find "
251 "buffer; dumping queues\n");
252 for (i = 1; i < UDF_SHED_MAX; i++) {
253 printf("queue %d\n\t", i);
254 ret = BUFQ_GET(priv->queues[i]);
255 while (ret) {
256 printf("%p ", ret);
257 if (ret == eccline->buf)
258 printf("[<-] ");
259 ret = BUFQ_GET(priv->queues[i]);
260 }
261 printf("\n");
262 }
263 panic("fatal queue bug; exit");
264 }
265 #endif
266
267 KASSERT(ret == eccline->buf);
268 priv->num_queued[curqueue]--;
269 }
270
271 /* set buffer block numbers to make sure its queued correctly */
272 eccline->buf->b_lblkno = eccline->start_sector;
273 eccline->buf->b_blkno = eccline->start_sector;
274 eccline->buf->b_rawblkno = eccline->start_sector;
275
276 BUFQ_PUT(priv->queues[newqueue], eccline->buf);
277 eccline->queued_on = newqueue;
278 priv->num_queued[newqueue]++;
279 vfs_timestamp(&priv->last_queued[newqueue]);
280
281 if (eccline->flags & ECC_FLOATING) {
282 eccline->flags &= ~ECC_FLOATING;
283 priv->num_floating--;
284 }
285
286 if ((newqueue != UDF_SHED_FREE) && (newqueue != UDF_SHED_IDLE))
287 cv_signal(&priv->discstrat_cv);
288 }
289
290
291 static struct udf_eccline *
292 udf_pop_eccline(struct strat_private *priv, int queued_on)
293 {
294 struct udf_eccline *eccline;
295 struct buf *buf;
296
297 KASSERT(mutex_owned(&priv->discstrat_mutex));
298
299 buf = BUFQ_GET(priv->queues[queued_on]);
300 if (!buf) {
301 KASSERT(priv->num_queued[queued_on] == 0);
302 return NULL;
303 }
304
305 eccline = BTOE(buf);
306 KASSERT(eccline->queued_on == queued_on);
307 eccline->queued_on = 0;
308 priv->num_queued[queued_on]--;
309
310 if (eccline->flags & ECC_FLOATING)
311 panic("popping already marked floating eccline");
312 eccline->flags |= ECC_FLOATING;
313 priv->num_floating++;
314
315 DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
316 eccline->buf, queued_on));
317
318 return eccline;
319 }
320
321
322 static struct udf_eccline *
323 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
324 {
325 struct strat_private *priv = PRIV(ump);
326 struct udf_eccline *eccline;
327 uint32_t start_sector, lb_size, blobsize;
328 uint8_t *eccline_blob;
329 int line, line_offset;
330 int num_busy, ret;
331
332 line_offset = sector % ump->packet_size;
333 start_sector = sector - line_offset;
334 line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
335
336 mutex_enter(&priv->discstrat_mutex);
337
338 retry:
339 DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
340 LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
341 if (eccline->start_sector == start_sector) {
342 DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
343 eccline->start_sector));
344
345 UDF_LOCK_ECCLINE(eccline);
346 /* move from freelist (!) */
347 if (eccline->queued_on == UDF_SHED_FREE) {
348 DPRINTF(ECCLINE, ("was on freelist\n"));
349 KASSERT(eccline->refcnt == 0);
350 udf_push_eccline(eccline, UDF_SHED_IDLE);
351 }
352 eccline->refcnt++;
353 mutex_exit(&priv->discstrat_mutex);
354 return eccline;
355 }
356 }
357
358 DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
359 /* not found in eccline cache */
360
361 lb_size = udf_rw32(ump->logical_vol->lb_size);
362 blobsize = ump->packet_size * lb_size;
363
364 /* dont allow too many pending requests */
365 DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
366 num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
367 if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
368 ret = cv_timedwait(&priv->discstrat_cv,
369 &priv->discstrat_mutex, hz/8);
370 goto retry;
371 }
372
373 eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
374 eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
375 if ((eccline_blob == NULL) || (eccline == NULL)) {
376 if (eccline_blob)
377 pool_put(&priv->ecclineblob_pool, eccline_blob);
378 if (eccline)
379 pool_put(&priv->eccline_pool, eccline);
380
381 /* out of memory for now; canibalise freelist */
382 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
383 if (eccline == NULL) {
384 /* serious trouble; wait and retry */
385 cv_timedwait(&priv->discstrat_cv,
386 &priv->discstrat_mutex, hz/8);
387 goto retry;
388 }
389 /* push back line if we're waiting for it */
390 if (eccline->flags & ECC_WANTED) {
391 udf_push_eccline(eccline, UDF_SHED_IDLE);
392 goto retry;
393 }
394
395 /* unlink this entry */
396 LIST_REMOVE(eccline, hashchain);
397
398 KASSERT(eccline->flags & ECC_FLOATING);
399
400 eccline_blob = eccline->blob;
401 memset(eccline, 0, sizeof(struct udf_eccline));
402 eccline->flags = ECC_FLOATING;
403 } else {
404 memset(eccline, 0, sizeof(struct udf_eccline));
405 eccline->flags = ECC_FLOATING;
406 priv->num_floating++;
407 }
408
409 eccline->queued_on = 0;
410 eccline->blob = eccline_blob;
411 eccline->buf = getiobuf(NULL, true);
412 eccline->buf->b_private = eccline; /* IMPORTANT */
413
414 /* initialise eccline blob */
415 memset(eccline->blob, 0, blobsize);
416
417 eccline->ump = ump;
418 eccline->present = eccline->readin = eccline->dirty = 0;
419 eccline->error = 0;
420 eccline->refcnt = 0;
421
422 eccline->start_sector = start_sector;
423 eccline->buf->b_lblkno = start_sector;
424 eccline->buf->b_blkno = start_sector;
425 eccline->buf->b_rawblkno = start_sector;
426
427 LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
428
429 /*
430 * TODO possible optimalisation for checking overlap with partitions
431 * to get a clue on future eccline usage
432 */
433 eccline->refcnt++;
434 UDF_LOCK_ECCLINE(eccline);
435
436 mutex_exit(&priv->discstrat_mutex);
437
438 return eccline;
439 }
440
441
442 static void
443 udf_puteccline(struct udf_eccline *eccline)
444 {
445 struct strat_private *priv = PRIV(eccline->ump);
446 struct udf_eccline *deccline;
447 struct udf_mount *ump = eccline->ump;
448 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
449 int newqueue, tries;
450
451 mutex_enter(&priv->discstrat_mutex);
452
453 /* clear directly all readin requests from present ones */
454 if (eccline->readin & eccline->present) {
455 /* clear all read bits that are already read in */
456 eccline->readin &= (~eccline->present) & allbits;
457 wakeup(eccline);
458 }
459
460 DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
461 eccline->start_sector, eccline->refcnt));
462
463 /* requeue */
464 newqueue = UDF_SHED_FREE;
465 if (eccline->refcnt > 1)
466 newqueue = UDF_SHED_IDLE;
467 if (eccline->flags & ECC_WANTED)
468 newqueue = UDF_SHED_IDLE;
469 if (eccline->dirty) {
470 newqueue = UDF_SHED_WRITING;
471 if (eccline->flags & ECC_SEQWRITING)
472 newqueue = UDF_SHED_SEQWRITING;
473 }
474
475 /* if we have active nodes */
476 if (eccline->refcnt > 1) {
477 /* we dont set it on seqwriting */
478 eccline->flags &= ~ECC_SEQWRITING;
479 }
480
481 /* if we need reading in or not all is yet present, queue reading */
482 if ((eccline->readin) || (eccline->present != allbits))
483 newqueue = UDF_SHED_READING;
484
485 /* reduce the number of kept free buffers */
486 tries = priv->num_queued[UDF_SHED_FREE] - UDF_ECCLINE_MAXFREE;
487 while (tries > 0 /* priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE */) {
488 deccline = udf_pop_eccline(priv, UDF_SHED_FREE);
489 KASSERT(deccline);
490 KASSERT(deccline->refcnt == 0);
491 if (deccline->flags & ECC_WANTED) {
492 udf_push_eccline(deccline, UDF_SHED_IDLE);
493 DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
494 } else {
495 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
496 udf_dispose_eccline(deccline);
497 }
498 tries--;
499 }
500
501 udf_push_eccline(eccline, newqueue);
502
503 KASSERT(eccline->refcnt >= 1);
504 eccline->refcnt--;
505 UDF_UNLOCK_ECCLINE(eccline);
506
507 mutex_exit(&priv->discstrat_mutex);
508 }
509
510 /* --------------------------------------------------------------------- */
511
512 static int
513 udf_create_nodedscr_rmw(struct udf_strat_args *args)
514 {
515 union dscrptr **dscrptr = &args->dscr;
516 struct udf_mount *ump = args->ump;
517 struct long_ad *icb = args->icb;
518 struct udf_eccline *eccline;
519 uint64_t bit;
520 uint32_t sectornr, lb_size, dummy;
521 uint8_t *mem;
522 int error, eccsect;
523
524 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
525 if (error)
526 return error;
527
528 lb_size = udf_rw32(ump->logical_vol->lb_size);
529
530 /* get our eccline */
531 eccline = udf_geteccline(ump, sectornr, 0);
532 eccsect = sectornr - eccline->start_sector;
533
534 bit = (uint64_t) 1 << eccsect;
535 eccline->readin &= ~bit; /* just in case */
536 eccline->present |= bit;
537 eccline->dirty &= ~bit; /* Err... euhm... clean? */
538
539 eccline->refcnt++;
540
541 /* clear space */
542 mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
543 memset(mem, 0, lb_size);
544
545 udf_puteccline(eccline);
546
547 *dscrptr = (union dscrptr *) mem;
548 return 0;
549 }
550
551
552 static void
553 udf_free_nodedscr_rmw(struct udf_strat_args *args)
554 {
555 struct udf_mount *ump = args->ump;
556 struct long_ad *icb = args->icb;
557 struct udf_eccline *eccline;
558 uint64_t bit;
559 uint32_t sectornr, dummy;
560 int error, eccsect;
561
562 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
563 if (error)
564 return;
565
566 /* get our eccline */
567 eccline = udf_geteccline(ump, sectornr, 0);
568 eccsect = sectornr - eccline->start_sector;
569
570 bit = (uint64_t) 1 << eccsect;
571 eccline->readin &= ~bit; /* just in case */
572
573 KASSERT(eccline->refcnt >= 1);
574 eccline->refcnt--;
575
576 udf_puteccline(eccline);
577 }
578
579
580 static int
581 udf_read_nodedscr_rmw(struct udf_strat_args *args)
582 {
583 union dscrptr **dscrptr = &args->dscr;
584 struct udf_mount *ump = args->ump;
585 struct long_ad *icb = args->icb;
586 struct udf_eccline *eccline;
587 uint64_t bit;
588 uint32_t sectornr, dummy;
589 uint8_t *pos;
590 int sector_size = ump->discinfo.sector_size;
591 int lb_size = udf_rw32(ump->logical_vol->lb_size);
592 int i, error, dscrlen, eccsect;
593
594 lb_size = lb_size;
595 KASSERT(sector_size == lb_size);
596 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
597 if (error)
598 return error;
599
600 /* get our eccline */
601 eccline = udf_geteccline(ump, sectornr, 0);
602 eccsect = sectornr - eccline->start_sector;
603
604 bit = (uint64_t) 1 << eccsect;
605 if ((eccline->present & bit) == 0) {
606 /* mark bit for readin */
607 eccline->readin |= bit;
608 eccline->refcnt++; /* prevent recycling */
609 KASSERT(eccline->bufs[eccsect] == NULL);
610 udf_puteccline(eccline);
611
612 /* wait for completion; XXX remodel to lock bit code */
613 error = 0;
614 while ((eccline->present & bit) == 0) {
615 tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
616 if (eccline->error & bit) {
617 KASSERT(eccline->refcnt >= 1);
618 eccline->refcnt--; /* undo temp refcnt */
619 *dscrptr = NULL;
620 return EIO; /* XXX error code */
621 }
622 }
623
624 /* reget our line */
625 eccline = udf_geteccline(ump, sectornr, 0);
626 KASSERT(eccline->refcnt >= 1);
627 eccline->refcnt--; /* undo refcnt */
628 }
629
630 *dscrptr = (union dscrptr *)
631 (((uint8_t *) eccline->blob) + eccsect * sector_size);
632
633 /* code from read_phys_descr */
634 /* check if its a valid tag */
635 error = udf_check_tag(*dscrptr);
636 if (error) {
637 /* check if its an empty block */
638 pos = (uint8_t *) *dscrptr;
639 for (i = 0; i < sector_size; i++, pos++) {
640 if (*pos) break;
641 }
642 if (i == sector_size) {
643 /* return no error but with no dscrptr */
644 error = 0;
645 }
646 *dscrptr = NULL;
647 udf_puteccline(eccline);
648 return error;
649 }
650
651 /* calculate descriptor size */
652 dscrlen = udf_tagsize(*dscrptr, sector_size);
653 error = udf_check_tag_payload(*dscrptr, dscrlen);
654 if (error) {
655 *dscrptr = NULL;
656 udf_puteccline(eccline);
657 return error;
658 }
659
660 eccline->refcnt++;
661 udf_puteccline(eccline);
662
663 return 0;
664 }
665
666
667 static int
668 udf_write_nodedscr_rmw(struct udf_strat_args *args)
669 {
670 union dscrptr *dscrptr = args->dscr;
671 struct udf_mount *ump = args->ump;
672 struct long_ad *icb = args->icb;
673 struct udf_node *udf_node = args->udf_node;
674 struct udf_eccline *eccline;
675 uint64_t bit;
676 uint32_t sectornr, logsectornr, dummy;
677 // int waitfor = args->waitfor;
678 int sector_size = ump->discinfo.sector_size;
679 int lb_size = udf_rw32(ump->logical_vol->lb_size);
680 int error, eccsect;
681
682 lb_size = lb_size;
683 KASSERT(sector_size == lb_size);
684 sectornr = 0;
685 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
686 if (error)
687 return error;
688
689 /* add reference to the vnode to prevent recycling */
690 vhold(udf_node->vnode);
691
692 /* get our eccline */
693 eccline = udf_geteccline(ump, sectornr, 0);
694 eccsect = sectornr - eccline->start_sector;
695
696 bit = (uint64_t) 1 << eccsect;
697
698 /* old callback still pending? */
699 if (eccline->bufs[eccsect]) {
700 DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
701 " over buffer?\n"));
702 nestiobuf_done(eccline->bufs[eccsect],
703 eccline->bufs_len[eccsect],
704 0);
705 eccline->bufs[eccsect] = NULL;
706 }
707
708 /* set sector number in the descriptor and validate */
709 dscrptr = (union dscrptr *)
710 (((uint8_t *) eccline->blob) + eccsect * sector_size);
711 KASSERT(dscrptr == args->dscr);
712
713 logsectornr = udf_rw32(icb->loc.lb_num);
714 dscrptr->tag.tag_loc = udf_rw32(logsectornr);
715 udf_validate_tag_and_crc_sums(dscrptr);
716
717 udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
718
719 /* set our flags */
720 KASSERT(eccline->present & bit);
721 eccline->dirty |= bit;
722
723 KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
724
725 udf_puteccline(eccline);
726
727 holdrele(udf_node->vnode);
728 udf_node->outstanding_nodedscr--;
729 if (udf_node->outstanding_nodedscr == 0) {
730 UDF_UNLOCK_NODE(udf_node, udf_node->i_flags & IN_CALLBACK_ULK);
731 wakeup(&udf_node->outstanding_nodedscr);
732 }
733
734 /* XXX waitfor not used */
735 return 0;
736 }
737
738
739 static void
740 udf_queuebuf_rmw(struct udf_strat_args *args)
741 {
742 struct udf_mount *ump = args->ump;
743 struct buf *buf = args->nestbuf;
744 struct desc_tag *tag;
745 struct strat_private *priv = PRIV(ump);
746 struct udf_eccline *eccline;
747 struct long_ad *node_ad_cpy;
748 uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
749 uint32_t buf_len, len, sectors, sectornr, our_sectornr;
750 uint32_t bpos;
751 uint16_t vpart_num;
752 uint8_t *fidblk, *src, *dst;
753 int sector_size = ump->discinfo.sector_size;
754 int blks = sector_size / DEV_BSIZE;
755 int eccsect, what, queue, error;
756
757 KASSERT(ump);
758 KASSERT(buf);
759 KASSERT(buf->b_iodone == nestiobuf_iodone);
760
761 blknr = buf->b_blkno;
762 our_sectornr = blknr / blks;
763
764 what = buf->b_udf_c_type;
765 queue = UDF_SHED_READING;
766 if ((buf->b_flags & B_READ) == 0) {
767 /* writing */
768 queue = UDF_SHED_SEQWRITING;
769 if (what == UDF_C_DSCR)
770 queue = UDF_SHED_WRITING;
771 if (what == UDF_C_NODE)
772 queue = UDF_SHED_WRITING;
773 }
774
775 if (queue == UDF_SHED_READING) {
776 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
777 "b_resid %d, b_bcount %d, b_bufsize %d\n",
778 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
779 buf->b_resid, buf->b_bcount, buf->b_bufsize));
780
781 /* mark bits for reading */
782 buf_len = buf->b_bcount;
783 sectornr = our_sectornr;
784 eccline = udf_geteccline(ump, sectornr, 0);
785 eccsect = sectornr - eccline->start_sector;
786 bpos = 0;
787 while (buf_len) {
788 len = MIN(buf_len, sector_size);
789 if (eccsect == ump->packet_size) {
790 udf_puteccline(eccline);
791 eccline = udf_geteccline(ump, sectornr, 0);
792 eccsect = sectornr - eccline->start_sector;
793 }
794 bit = (uint64_t) 1 << eccsect;
795 error = eccline->error & bit ? EIO : 0;
796 if (eccline->present & bit) {
797 src = (uint8_t *) eccline->blob +
798 eccsect * sector_size;
799 dst = (uint8_t *) buf->b_data + bpos;
800 if (!error)
801 memcpy(dst, src, len);
802 nestiobuf_done(buf, len, error);
803 } else {
804 eccline->readin |= bit;
805 KASSERT(eccline->bufs[eccsect] == NULL);
806 eccline->bufs[eccsect] = buf;
807 eccline->bufs_bpos[eccsect] = bpos;
808 eccline->bufs_len[eccsect] = len;
809 }
810 bpos += sector_size;
811 eccsect++;
812 sectornr++;
813 buf_len -= len;
814 }
815 udf_puteccline(eccline);
816 return;
817 }
818
819 if (queue == UDF_SHED_WRITING) {
820 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
821 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
822 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
823 buf->b_resid, buf->b_bcount, buf->b_bufsize));
824 /* if we have FIDs fixup using buffer's sector number(s) */
825 if (buf->b_udf_c_type == UDF_C_FIDS) {
826 panic("UDF_C_FIDS in SHED_WRITING!\n");
827 #if 0
828 buf_len = buf->b_bcount;
829 sectornr = our_sectornr;
830 bpos = 0;
831 while (buf_len) {
832 len = MIN(buf_len, sector_size);
833 fidblk = (uint8_t *) buf->b_data + bpos;
834 udf_fixup_fid_block(fidblk, sector_size,
835 0, len, sectornr);
836 sectornr++;
837 bpos += len;
838 buf_len -= len;
839 }
840 #endif
841 }
842 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
843
844 /* copy parts into the bufs and set for writing */
845 buf_len = buf->b_bcount;
846 sectornr = our_sectornr;
847 eccline = udf_geteccline(ump, sectornr, 0);
848 eccsect = sectornr - eccline->start_sector;
849 bpos = 0;
850 while (buf_len) {
851 len = MIN(buf_len, sector_size);
852 if (eccsect == ump->packet_size) {
853 udf_puteccline(eccline);
854 eccline = udf_geteccline(ump, sectornr, 0);
855 eccsect = sectornr - eccline->start_sector;
856 }
857 bit = (uint64_t) 1 << eccsect;
858 KASSERT((eccline->readin & bit) == 0);
859 eccline->present |= bit;
860 eccline->dirty |= bit;
861 if (eccline->bufs[eccsect]) {
862 /* old callback still pending */
863 nestiobuf_done(eccline->bufs[eccsect],
864 eccline->bufs_len[eccsect],
865 0);
866 eccline->bufs[eccsect] = NULL;
867 }
868
869 src = (uint8_t *) buf->b_data + bpos;
870 dst = (uint8_t *) eccline->blob + eccsect * sector_size;
871 if (len != sector_size)
872 memset(dst, 0, sector_size);
873 memcpy(dst, src, len);
874
875 /* note that its finished for this extent */
876 eccline->bufs[eccsect] = NULL;
877 nestiobuf_done(buf, len, 0);
878
879 bpos += sector_size;
880 eccsect++;
881 sectornr++;
882 buf_len -= len;
883 }
884 udf_puteccline(eccline);
885 return;
886
887 }
888
889 /* sequential writing */
890 KASSERT(queue == UDF_SHED_SEQWRITING);
891 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
892 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
893 buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
894 buf->b_bufsize));
895 /*
896 * Buffers should not have been allocated to disc addresses yet on
897 * this queue. Note that a buffer can get multiple extents allocated.
898 * Note that it *looks* like the normal writing but its different in
899 * the details.
900 *
901 * lmapping contains lb_num relative to base partition.
902 *
903 * XXX should we try to claim/organize the allocated memory to
904 * block-aligned pieces?
905 */
906 mutex_enter(&priv->seqwrite_mutex);
907
908 lmapping = ump->la_lmapping;
909 node_ad_cpy = ump->la_node_ad_cpy;
910
911 /* logically allocate buf and map it in the file */
912 udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
913
914 /* if we have FIDs, fixup using the new allocation table */
915 if (buf->b_udf_c_type == UDF_C_FIDS) {
916 buf_len = buf->b_bcount;
917 bpos = 0;
918 lmappos = lmapping;
919 while (buf_len) {
920 sectornr = *lmappos++;
921 len = MIN(buf_len, sector_size);
922 fidblk = (uint8_t *) buf->b_data + bpos;
923 udf_fixup_fid_block(fidblk, sector_size,
924 0, len, sectornr);
925 bpos += len;
926 buf_len -= len;
927 }
928 }
929 if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
930 if (buf->b_lblkno == 0) {
931 /* update the tag location inside */
932 tag = (struct desc_tag *) buf->b_data;
933 tag->tag_loc = udf_rw32(*lmapping);
934 udf_validate_tag_and_crc_sums(buf->b_data);
935 }
936 }
937 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
938
939 /*
940 * Translate new mappings in lmapping to pmappings.
941 * pmapping to contain lb_nums as used for disc adressing.
942 */
943 pmapping = ump->la_pmapping;
944 sectors = (buf->b_bcount + sector_size -1) / sector_size;
945 udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
946
947 /* copy parts into the bufs and set for writing */
948 pmappos = pmapping;
949 buf_len = buf->b_bcount;
950 sectornr = *pmappos++;
951 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
952 eccsect = sectornr - eccline->start_sector;
953 bpos = 0;
954 while (buf_len) {
955 len = MIN(buf_len, sector_size);
956 eccsect = sectornr - eccline->start_sector;
957 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
958 eccline->flags |= ECC_SEQWRITING;
959 udf_puteccline(eccline);
960 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
961 eccsect = sectornr - eccline->start_sector;
962 }
963 bit = (uint64_t) 1 << eccsect;
964 KASSERT((eccline->readin & bit) == 0);
965 eccline->present |= bit;
966 eccline->dirty |= bit;
967 eccline->bufs[eccsect] = NULL;
968
969 src = (uint8_t *) buf->b_data + bpos;
970 dst = (uint8_t *)
971 eccline->blob + eccsect * sector_size;
972 if (len != sector_size)
973 memset(dst, 0, sector_size);
974 memcpy(dst, src, len);
975
976 /* note that its finished for this extent */
977 nestiobuf_done(buf, len, 0);
978
979 bpos += sector_size;
980 sectornr = *pmappos++;
981 buf_len -= len;
982 }
983 eccline->flags |= ECC_SEQWRITING;
984 udf_puteccline(eccline);
985 mutex_exit(&priv->seqwrite_mutex);
986 }
987
988 /* --------------------------------------------------------------------- */
989
990 static void
991 udf_shedule_read_callback(struct buf *buf)
992 {
993 struct udf_eccline *eccline = BTOE(buf);
994 struct udf_mount *ump = eccline->ump;
995 uint64_t bit;
996 uint8_t *src, *dst;
997 int sector_size = ump->discinfo.sector_size;
998 int error, i, len;
999
1000 DPRINTF(ECCLINE, ("read callback called\n"));
1001 /* post process read action */
1002 error = buf->b_error;
1003 for (i = 0; i < ump->packet_size; i++) {
1004 bit = (uint64_t) 1 << i;
1005 src = (uint8_t *) buf->b_data + i * sector_size;
1006 dst = (uint8_t *) eccline->blob + i * sector_size;
1007 if (eccline->present & bit)
1008 continue;
1009 eccline->present |= bit;
1010 if (error)
1011 eccline->error |= bit;
1012 if (eccline->bufs[i]) {
1013 dst = (uint8_t *) eccline->bufs[i]->b_data +
1014 eccline->bufs_bpos[i];
1015 len = eccline->bufs_len[i];
1016 if (!error)
1017 memcpy(dst, src, len);
1018 nestiobuf_done(eccline->bufs[i], len, error);
1019 eccline->bufs[i] = NULL;
1020 }
1021
1022 }
1023 KASSERT(buf->b_data == eccline->blob);
1024 KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
1025
1026 /*
1027 * XXX TODO what to do on read errors? read in all sectors
1028 * synchronously and allocate a sparable entry?
1029 */
1030
1031 wakeup(eccline);
1032 udf_puteccline(eccline);
1033 DPRINTF(ECCLINE, ("read callback finished\n"));
1034 }
1035
1036
1037 static void
1038 udf_shedule_write_callback(struct buf *buf)
1039 {
1040 struct udf_eccline *eccline = BTOE(buf);
1041 struct udf_mount *ump = eccline->ump;
1042 uint64_t bit;
1043 int error, i, len;
1044
1045 DPRINTF(ECCLINE, ("write callback called\n"));
1046 /* post process write action */
1047 error = buf->b_error;
1048 for (i = 0; i < ump->packet_size; i++) {
1049 bit = (uint64_t) 1 << i;
1050 if ((eccline->dirty & bit) == 0)
1051 continue;
1052 if (error) {
1053 eccline->error |= bit;
1054 } else {
1055 eccline->dirty &= ~bit;
1056 }
1057 if (eccline->bufs[i]) {
1058 len = eccline->bufs_len[i];
1059 nestiobuf_done(eccline->bufs[i], len, error);
1060 eccline->bufs[i] = NULL;
1061 }
1062 }
1063 KASSERT(eccline->dirty == 0);
1064
1065 KASSERT(error == 0);
1066 /*
1067 * XXX TODO on write errors allocate a sparable entry
1068 */
1069
1070 wakeup(eccline);
1071 udf_puteccline(eccline);
1072 }
1073
1074
1075 static void
1076 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
1077 {
1078 struct udf_mount *ump = eccline->ump;
1079 struct strat_private *priv = PRIV(ump);
1080 struct buf *buf, *nestbuf;
1081 uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
1082 uint32_t start;
1083 int sector_size = ump->discinfo.sector_size;
1084 int blks = sector_size / DEV_BSIZE;
1085 int i;
1086
1087 if (queued_on == UDF_SHED_READING) {
1088 DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
1089 /* read all bits that are not yet present */
1090 eccline->readin = (~eccline->present) & allbits;
1091 KASSERT(eccline->readin);
1092 start = eccline->start_sector;
1093 buf = eccline->buf;
1094 buf_init(buf);
1095 buf->b_flags = B_READ | B_ASYNC;
1096 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1097 buf->b_oflags = 0;
1098 buf->b_iodone = udf_shedule_read_callback;
1099 buf->b_data = eccline->blob;
1100 buf->b_bcount = ump->packet_size * sector_size;
1101 buf->b_resid = buf->b_bcount;
1102 buf->b_bufsize = buf->b_bcount;
1103 buf->b_private = eccline;
1104 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1105 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1106 buf->b_proc = NULL;
1107
1108 if (eccline->present != 0) {
1109 for (i = 0; i < ump->packet_size; i++) {
1110 bit = (uint64_t) 1 << i;
1111 if (eccline->present & bit) {
1112 nestiobuf_done(buf, sector_size, 0);
1113 continue;
1114 }
1115 nestbuf = getiobuf(NULL, true);
1116 nestiobuf_setup(buf, nestbuf, i * sector_size,
1117 sector_size);
1118 /* adjust blocknumber to read */
1119 nestbuf->b_blkno = buf->b_blkno + i*blks;
1120 nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
1121
1122 DPRINTF(SHEDULE, ("sector %d ",
1123 start + i));
1124 /* call asynchronous */
1125 VOP_STRATEGY(ump->devvp, nestbuf);
1126 }
1127 DPRINTF(SHEDULE, ("\n"));
1128 return;
1129 }
1130 } else {
1131 /* write or seqwrite */
1132 DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
1133 if (eccline->present != allbits) {
1134 /* requeue to read-only */
1135 DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
1136 "reading\n"));
1137 udf_push_eccline(eccline, UDF_SHED_READING);
1138 return;
1139 }
1140 start = eccline->start_sector;
1141 buf = eccline->buf;
1142 buf_init(buf);
1143 buf->b_flags = B_WRITE | B_ASYNC;
1144 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1145 buf->b_oflags = 0;
1146 buf->b_iodone = udf_shedule_write_callback;
1147 buf->b_data = eccline->blob;
1148 buf->b_bcount = ump->packet_size * sector_size;
1149 buf->b_resid = buf->b_bcount;
1150 buf->b_bufsize = buf->b_bcount;
1151 buf->b_private = eccline;
1152 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1153 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1154 buf->b_proc = NULL;
1155 }
1156
1157 mutex_exit(&priv->discstrat_mutex);
1158 /* call asynchronous */
1159 DPRINTF(SHEDULE, ("sector %d for %d\n",
1160 start, ump->packet_size));
1161 VOP_STRATEGY(ump->devvp, buf);
1162 mutex_enter(&priv->discstrat_mutex);
1163 }
1164
1165
1166 static void
1167 udf_discstrat_thread(void *arg)
1168 {
1169 struct udf_mount *ump = (struct udf_mount *) arg;
1170 struct strat_private *priv = PRIV(ump);
1171 struct udf_eccline *eccline;
1172 struct timespec now, *last;
1173 int new_queue, wait, work;
1174
1175 work = 1;
1176 mutex_enter(&priv->discstrat_mutex);
1177 priv->num_floating = 0;
1178 while (priv->run_thread || work || priv->num_floating) {
1179 /* process the current selected queue */
1180 /* maintenance: free exess ecclines */
1181 while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
1182 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1183 KASSERT(eccline);
1184 KASSERT(eccline->refcnt == 0);
1185 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
1186 udf_dispose_eccline(eccline);
1187 }
1188
1189 /* get our time */
1190 vfs_timestamp(&now);
1191 last = &priv->last_queued[priv->cur_queue];
1192
1193 /* don't shedule too quickly when there is only one */
1194 if (priv->cur_queue == UDF_SHED_WRITING) {
1195 if (priv->num_queued[priv->cur_queue] <= 2) {
1196 if (now.tv_sec - last->tv_sec < 4) {
1197 /* wait some time */
1198 cv_timedwait(&priv->discstrat_cv,
1199 &priv->discstrat_mutex, hz);
1200 continue;
1201 }
1202 }
1203 }
1204
1205 /* get our line */
1206 eccline = udf_pop_eccline(priv, priv->cur_queue);
1207 if (eccline) {
1208 wait = 0;
1209 new_queue = priv->cur_queue;
1210 DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
1211
1212 /* complete the `get' by locking and refcounting it */
1213 UDF_LOCK_ECCLINE(eccline);
1214 eccline->refcnt++;
1215
1216 udf_issue_eccline(eccline, priv->cur_queue);
1217 } else {
1218 wait = 1;
1219 /* check if we can/should switch */
1220 new_queue = priv->cur_queue;
1221 if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
1222 new_queue = UDF_SHED_READING;
1223 if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))
1224 new_queue = UDF_SHED_WRITING;
1225 if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
1226 new_queue = UDF_SHED_SEQWRITING;
1227
1228 /* dont switch seqwriting too fast */
1229 if (priv->cur_queue == UDF_SHED_READING) {
1230 if (now.tv_sec - last->tv_sec < 1)
1231 new_queue = priv->cur_queue;
1232 }
1233 if (priv->cur_queue == UDF_SHED_WRITING) {
1234 if (now.tv_sec - last->tv_sec < 2)
1235 new_queue = priv->cur_queue;
1236 }
1237 if (priv->cur_queue == UDF_SHED_SEQWRITING) {
1238 if (now.tv_sec - last->tv_sec < 4)
1239 new_queue = priv->cur_queue;
1240 }
1241 }
1242
1243 /* give room */
1244 mutex_exit(&priv->discstrat_mutex);
1245
1246 if (new_queue != priv->cur_queue) {
1247 wait = 0;
1248 DPRINTF(SHEDULE, ("switching from %d to %d\n",
1249 priv->cur_queue, new_queue));
1250 priv->cur_queue = new_queue;
1251 }
1252 mutex_enter(&priv->discstrat_mutex);
1253
1254 /* wait for more if needed */
1255 if (wait)
1256 cv_timedwait(&priv->discstrat_cv,
1257 &priv->discstrat_mutex, hz); /* /8 */
1258
1259 work = (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL);
1260 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL);
1261 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
1262
1263 DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
1264 (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL),
1265 (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL),
1266 (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
1267 work, priv->num_floating));
1268 }
1269
1270 mutex_exit(&priv->discstrat_mutex);
1271
1272 /* tear down remaining ecclines */
1273 mutex_enter(&priv->discstrat_mutex);
1274 KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
1275 KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
1276 KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
1277 KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
1278
1279 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_IDLE]) == NULL);
1280 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
1281 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
1282 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
1283 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1284 while (eccline) {
1285 udf_dispose_eccline(eccline);
1286 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1287 }
1288 KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
1289 mutex_exit(&priv->discstrat_mutex);
1290
1291 priv->thread_finished = 1;
1292 wakeup(&priv->run_thread);
1293 kthread_exit(0);
1294 /* not reached */
1295 }
1296
1297 /* --------------------------------------------------------------------- */
1298
1299 /*
1300 * Buffer memory pool allocator.
1301 */
1302
1303 static void *
1304 ecclinepool_page_alloc(struct pool *pp, int flags)
1305 {
1306 return (void *)uvm_km_alloc(kernel_map,
1307 MAXBSIZE, MAXBSIZE,
1308 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
1309 | UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
1310 }
1311
1312 static void
1313 ecclinepool_page_free(struct pool *pp, void *v)
1314 {
1315 uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
1316 }
1317
1318 static struct pool_allocator ecclinepool_allocator = {
1319 .pa_alloc = ecclinepool_page_alloc,
1320 .pa_free = ecclinepool_page_free,
1321 .pa_pagesz = MAXBSIZE,
1322 };
1323
1324
1325 static void
1326 udf_discstrat_init_rmw(struct udf_strat_args *args)
1327 {
1328 struct udf_mount *ump = args->ump;
1329 struct strat_private *priv = PRIV(ump);
1330 uint32_t lb_size, blobsize, hashline;
1331 int i;
1332
1333 KASSERT(ump);
1334 KASSERT(ump->logical_vol);
1335 KASSERT(priv == NULL);
1336
1337 lb_size = udf_rw32(ump->logical_vol->lb_size);
1338 blobsize = ump->packet_size * lb_size;
1339 KASSERT(lb_size > 0);
1340 KASSERT(ump->packet_size <= 64);
1341
1342 /* initialise our memory space */
1343 ump->strategy_private = malloc(sizeof(struct strat_private),
1344 M_UDFTEMP, M_WAITOK);
1345 priv = ump->strategy_private;
1346 memset(priv, 0 , sizeof(struct strat_private));
1347
1348 /* initialise locks */
1349 cv_init(&priv->discstrat_cv, "udfstrat");
1350 mutex_init(&priv->discstrat_mutex, MUTEX_DRIVER, IPL_BIO);
1351 mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
1352
1353 /* initialise struct eccline pool */
1354 pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
1355 0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
1356
1357 /* initialise eccline blob pool */
1358 pool_init(&priv->ecclineblob_pool, blobsize,
1359 0,0,0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
1360
1361 /* initialise main queues */
1362 for (i = 0; i < UDF_SHED_MAX; i++) {
1363 priv->num_queued[i] = 0;
1364 vfs_timestamp(&priv->last_queued[i]);
1365 }
1366 bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
1367 BUFQ_SORT_RAWBLOCK);
1368 bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
1369 BUFQ_SORT_RAWBLOCK);
1370 bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
1371
1372 /* initialise administrative queues */
1373 bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
1374 bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
1375
1376 for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
1377 LIST_INIT(&priv->eccline_hash[hashline]);
1378 }
1379
1380 /* create our disk strategy thread */
1381 priv->cur_queue = UDF_SHED_READING;
1382 priv->thread_finished = 0;
1383 priv->run_thread = 1;
1384 if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
1385 udf_discstrat_thread, ump, &priv->queue_lwp,
1386 "%s", "udf_rw")) {
1387 panic("fork udf_rw");
1388 }
1389 }
1390
1391
1392 static void
1393 udf_discstrat_finish_rmw(struct udf_strat_args *args)
1394 {
1395 struct udf_mount *ump = args->ump;
1396 struct strat_private *priv = PRIV(ump);
1397 int error;
1398
1399 if (ump == NULL)
1400 return;
1401
1402 /* stop our sheduling thread */
1403 KASSERT(priv->run_thread == 1);
1404 priv->run_thread = 0;
1405 wakeup(priv->queue_lwp);
1406 while (!priv->thread_finished) {
1407 error = tsleep(&priv->run_thread, PRIBIO+1,
1408 "udfshedfin", hz);
1409 }
1410 /* kthread should be finished now */
1411
1412 /* cleanup our pools */
1413 pool_destroy(&priv->eccline_pool);
1414 pool_destroy(&priv->ecclineblob_pool);
1415
1416 cv_destroy(&priv->discstrat_cv);
1417 mutex_destroy(&priv->discstrat_mutex);
1418 mutex_destroy(&priv->seqwrite_mutex);
1419
1420 /* free our private space */
1421 free(ump->strategy_private, M_UDFTEMP);
1422 ump->strategy_private = NULL;
1423 }
1424
1425 /* --------------------------------------------------------------------- */
1426
1427 struct udf_strategy udf_strat_rmw =
1428 {
1429 udf_create_nodedscr_rmw,
1430 udf_free_nodedscr_rmw,
1431 udf_read_nodedscr_rmw,
1432 udf_write_nodedscr_rmw,
1433 udf_queuebuf_rmw,
1434 udf_discstrat_init_rmw,
1435 udf_discstrat_finish_rmw
1436 };
1437
1438