udf_strat_rmw.c revision 1.6 1 /* $NetBSD: udf_strat_rmw.c,v 1.6 2008/07/28 19:41:13 reinoud Exp $ */
2
3 /*
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.6 2008/07/28 19:41:13 reinoud Exp $");
32 #endif /* not lint */
33
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_quota.h"
37 #include "opt_compat_netbsd.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysctl.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <miscfs/genfs/genfs_node.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/file.h>
51 #include <sys/device.h>
52 #include <sys/disklabel.h>
53 #include <sys/ioctl.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/stat.h>
57 #include <sys/conf.h>
58 #include <sys/kauth.h>
59 #include <sys/kthread.h>
60 #include <dev/clock_subr.h>
61
62 #include <fs/udf/ecma167-udf.h>
63 #include <fs/udf/udf_mount.h>
64
65 #if defined(_KERNEL_OPT)
66 #include "opt_udf.h"
67 #endif
68
69 #include "udf.h"
70 #include "udf_subr.h"
71 #include "udf_bswap.h"
72
73
74 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
75 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
76 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
77
78 /* --------------------------------------------------------------------- */
79
80 #define UDF_MAX_PACKET_SIZE 64 /* DONT change this */
81
82 /* sheduler states */
83 #define UDF_SHED_MAX 6
84 #define UDF_SHED_READING 1
85 #define UDF_SHED_WRITING 2
86 #define UDF_SHED_SEQWRITING 3
87 #define UDF_SHED_IDLE 4 /* resting */
88 #define UDF_SHED_FREE 5 /* recycleable */
89
90 /* flags */
91 #define ECC_LOCKED 0x01 /* prevent access */
92 #define ECC_WANTED 0x02 /* trying access */
93 #define ECC_SEQWRITING 0x04 /* sequential queue */
94 #define ECC_FLOATING 0x08 /* not queued yet */
95
96
97 TAILQ_HEAD(ecclineq, udf_eccline);
98 struct udf_eccline {
99 struct udf_mount *ump;
100 uint64_t present; /* preserve these */
101 uint64_t readin; /* bitmap */
102 uint64_t dirty; /* bitmap */
103 uint64_t error; /* bitmap */
104 uint32_t refcnt;
105
106 uint32_t flags;
107 uint32_t start_sector; /* physical */
108
109 struct buf *buf;
110 void *blob;
111
112 struct buf *bufs[UDF_MAX_PACKET_SIZE];
113 uint32_t bufs_bpos[UDF_MAX_PACKET_SIZE];
114 int bufs_len[UDF_MAX_PACKET_SIZE];
115
116 int queued_on; /* on which BUFQ list */
117 LIST_ENTRY(udf_eccline) hashchain; /* on sector lookup */
118 };
119
120
121 struct strat_private {
122 lwp_t *queue_lwp;
123 kcondvar_t discstrat_cv; /* to wait on */
124 kmutex_t discstrat_mutex; /* disc strategy */
125 kmutex_t seqwrite_mutex; /* protect mappings */
126
127 int run_thread; /* thread control */
128 int thread_finished; /* thread control */
129 int cur_queue;
130
131 int num_floating;
132 int num_queued[UDF_SHED_MAX];
133 struct bufq_state *queues[UDF_SHED_MAX];
134 struct timespec last_queued[UDF_SHED_MAX];
135 struct disk_strategy old_strategy_setting;
136
137 struct pool eccline_pool;
138 struct pool ecclineblob_pool;
139 LIST_HEAD(, udf_eccline) eccline_hash[UDF_ECCBUF_HASHSIZE];
140 };
141
142 /* --------------------------------------------------------------------- */
143
144 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
145 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
146
147 /* can be called with or without discstrat lock */
148 static void
149 udf_lock_eccline(struct udf_eccline *eccline)
150 {
151 struct strat_private *priv = PRIV(eccline->ump);
152 int waslocked, ret;
153
154 waslocked = mutex_owned(&priv->discstrat_mutex);
155 if (!waslocked)
156 mutex_enter(&priv->discstrat_mutex);
157
158 /* wait until its unlocked first */
159 while (eccline->flags & ECC_LOCKED) {
160 eccline->flags |= ECC_WANTED;
161 ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
162 hz/8);
163 if (ret == EWOULDBLOCK)
164 DPRINTF(LOCKING, ("eccline lock helt, waiting for "
165 "release"));
166 }
167 eccline->flags |= ECC_LOCKED;
168 eccline->flags &= ~ECC_WANTED;
169
170 if (!waslocked)
171 mutex_exit(&priv->discstrat_mutex);
172 }
173
174
175 /* can be called with or without discstrat lock */
176 static void
177 udf_unlock_eccline(struct udf_eccline *eccline)
178 {
179 struct strat_private *priv = PRIV(eccline->ump);
180 int waslocked;
181
182 KASSERT(mutex_owned(&priv->discstrat_mutex));
183
184 waslocked = mutex_owned(&priv->discstrat_mutex);
185 if (!waslocked)
186 mutex_enter(&priv->discstrat_mutex);
187
188 eccline->flags &= ~ECC_LOCKED;
189 cv_broadcast(&priv->discstrat_cv);
190
191 if (!waslocked)
192 mutex_exit(&priv->discstrat_mutex);
193 }
194
195
196 /* NOTE discstrat_mutex should be held! */
197 static void
198 udf_dispose_eccline(struct udf_eccline *eccline)
199 {
200 struct strat_private *priv = PRIV(eccline->ump);
201 struct buf *ret;
202
203 KASSERT(mutex_owned(&priv->discstrat_mutex));
204
205 KASSERT(eccline->refcnt == 0);
206 KASSERT(eccline->dirty == 0);
207
208 DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
209 "present %0"PRIx64"\n", eccline->start_sector,
210 eccline->present));
211
212 if (eccline->queued_on) {
213 ret = BUFQ_CANCEL(priv->queues[eccline->queued_on], eccline->buf);
214 KASSERT(ret == eccline->buf);
215 priv->num_queued[eccline->queued_on]--;
216 }
217 LIST_REMOVE(eccline, hashchain);
218
219 if (eccline->flags & ECC_FLOATING) {
220 eccline->flags &= ~ECC_FLOATING;
221 priv->num_floating--;
222 }
223
224 putiobuf(eccline->buf);
225 pool_put(&priv->ecclineblob_pool, eccline->blob);
226 pool_put(&priv->eccline_pool, eccline);
227 }
228
229
230 /* NOTE discstrat_mutex should be held! */
231 static void
232 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
233 {
234 struct strat_private *priv = PRIV(eccline->ump);
235 struct buf *ret;
236 int curqueue;
237
238 KASSERT(mutex_owned(&priv->discstrat_mutex));
239
240 DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
241
242 /* requeue */
243 curqueue = eccline->queued_on;
244 if (curqueue) {
245 ret = BUFQ_CANCEL(priv->queues[curqueue], eccline->buf);
246
247 DPRINTF(PARANOIA, ("push_eccline BUFQ_CANCEL returned %p when "
248 "requested to remove %p from queue %d\n", ret,
249 eccline->buf, curqueue));
250 #ifdef DIAGNOSTIC
251 if (ret == NULL) {
252 int i;
253
254 printf("udf_push_eccline: bufq_cancel can't find "
255 "buffer; dumping queues\n");
256 for (i = 1; i < UDF_SHED_MAX; i++) {
257 printf("queue %d\n\t", i);
258 ret = BUFQ_GET(priv->queues[i]);
259 while (ret) {
260 printf("%p ", ret);
261 if (ret == eccline->buf)
262 printf("[<-] ");
263 ret = BUFQ_GET(priv->queues[i]);
264 }
265 printf("\n");
266 }
267 panic("fatal queue bug; exit");
268 }
269 #endif
270
271 KASSERT(ret == eccline->buf);
272 priv->num_queued[curqueue]--;
273 }
274
275 BUFQ_PUT(priv->queues[newqueue], eccline->buf);
276 eccline->queued_on = newqueue;
277 priv->num_queued[newqueue]++;
278 vfs_timestamp(&priv->last_queued[newqueue]);
279
280 if (eccline->flags & ECC_FLOATING) {
281 eccline->flags &= ~ECC_FLOATING;
282 priv->num_floating--;
283 }
284
285 if ((newqueue != UDF_SHED_FREE) && (newqueue != UDF_SHED_IDLE))
286 cv_signal(&priv->discstrat_cv);
287 }
288
289
290 static struct udf_eccline *
291 udf_pop_eccline(struct strat_private *priv, int queued_on)
292 {
293 struct udf_eccline *eccline;
294 struct buf *buf;
295
296 KASSERT(mutex_owned(&priv->discstrat_mutex));
297
298 buf = BUFQ_GET(priv->queues[queued_on]);
299 if (!buf) {
300 KASSERT(priv->num_queued[queued_on] == 0);
301 return NULL;
302 }
303
304 eccline = BTOE(buf);
305 KASSERT(eccline->queued_on == queued_on);
306 eccline->queued_on = 0;
307 priv->num_queued[queued_on]--;
308
309 if (eccline->flags & ECC_FLOATING)
310 panic("popping already marked floating eccline");
311 eccline->flags |= ECC_FLOATING;
312 priv->num_floating++;
313
314 DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
315 eccline->buf, queued_on));
316
317 return eccline;
318 }
319
320
321 static struct udf_eccline *
322 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
323 {
324 struct strat_private *priv = PRIV(ump);
325 struct udf_eccline *eccline;
326 uint32_t start_sector, lb_size, blobsize;
327 uint8_t *eccline_blob;
328 int line, line_offset;
329 int num_busy, ret;
330
331 line_offset = sector % ump->packet_size;
332 start_sector = sector - line_offset;
333 line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
334
335 mutex_enter(&priv->discstrat_mutex);
336
337 retry:
338 DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
339 LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
340 if (eccline->start_sector == start_sector) {
341 DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
342 eccline->start_sector));
343
344 UDF_LOCK_ECCLINE(eccline);
345 /* move from freelist (!) */
346 if (eccline->queued_on == UDF_SHED_FREE) {
347 DPRINTF(ECCLINE, ("was on freelist\n"));
348 KASSERT(eccline->refcnt == 0);
349 udf_push_eccline(eccline, UDF_SHED_IDLE);
350 }
351 eccline->refcnt++;
352 mutex_exit(&priv->discstrat_mutex);
353 return eccline;
354 }
355 }
356
357 DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
358 /* not found in eccline cache */
359
360 lb_size = udf_rw32(ump->logical_vol->lb_size);
361 blobsize = ump->packet_size * lb_size;
362
363 /* dont allow too many pending requests */
364 DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
365 num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
366 if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
367 ret = cv_timedwait(&priv->discstrat_cv,
368 &priv->discstrat_mutex, hz/8);
369 goto retry;
370 }
371
372 eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
373 eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
374 if ((eccline_blob == NULL) || (eccline == NULL)) {
375 if (eccline_blob)
376 pool_put(&priv->ecclineblob_pool, eccline_blob);
377 if (eccline)
378 pool_put(&priv->eccline_pool, eccline);
379
380 /* out of memory for now; canibalise freelist */
381 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
382 if (eccline == NULL) {
383 /* serious trouble; wait and retry */
384 cv_timedwait(&priv->discstrat_cv,
385 &priv->discstrat_mutex, hz/8);
386 goto retry;
387 }
388 /* push back line if we're waiting for it */
389 if (eccline->flags & ECC_WANTED) {
390 udf_push_eccline(eccline, UDF_SHED_IDLE);
391 goto retry;
392 }
393
394 /* unlink this entry */
395 LIST_REMOVE(eccline, hashchain);
396
397 KASSERT(eccline->flags & ECC_FLOATING);
398
399 eccline_blob = eccline->blob;
400 memset(eccline, 0, sizeof(struct udf_eccline));
401 eccline->flags = ECC_FLOATING;
402 } else {
403 memset(eccline, 0, sizeof(struct udf_eccline));
404 eccline->flags = ECC_FLOATING;
405 priv->num_floating++;
406 }
407
408 eccline->queued_on = 0;
409 eccline->blob = eccline_blob;
410 eccline->buf = getiobuf(NULL, true);
411 eccline->buf->b_private = eccline; /* IMPORTANT */
412
413 /* initialise eccline blob */
414 memset(eccline->blob, 0, blobsize);
415
416 eccline->ump = ump;
417 eccline->present = eccline->readin = eccline->dirty = 0;
418 eccline->error = 0;
419 eccline->refcnt = 0;
420 eccline->start_sector = start_sector;
421
422 LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
423
424 /*
425 * TODO possible optimalisation for checking overlap with partitions
426 * to get a clue on future eccline usage
427 */
428 eccline->refcnt++;
429 UDF_LOCK_ECCLINE(eccline);
430
431 mutex_exit(&priv->discstrat_mutex);
432
433 return eccline;
434 }
435
436
437 static void
438 udf_puteccline(struct udf_eccline *eccline)
439 {
440 struct strat_private *priv = PRIV(eccline->ump);
441 struct udf_eccline *deccline;
442 struct udf_mount *ump = eccline->ump;
443 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
444 int newqueue, tries;
445
446 mutex_enter(&priv->discstrat_mutex);
447
448 /* clear directly all readin requests from present ones */
449 if (eccline->readin & eccline->present) {
450 /* clear all read bits that are already read in */
451 eccline->readin &= (~eccline->present) & allbits;
452 wakeup(eccline);
453 }
454
455 DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
456 eccline->start_sector, eccline->refcnt));
457
458 /* requeue */
459 newqueue = UDF_SHED_FREE;
460 if (eccline->refcnt > 1)
461 newqueue = UDF_SHED_IDLE;
462 if (eccline->flags & ECC_WANTED)
463 newqueue = UDF_SHED_IDLE;
464 if (eccline->dirty) {
465 newqueue = UDF_SHED_WRITING;
466 if (eccline->flags & ECC_SEQWRITING)
467 newqueue = UDF_SHED_SEQWRITING;
468 }
469
470 /* if we have active nodes */
471 if (eccline->refcnt > 1) {
472 /* we dont set it on seqwriting */
473 eccline->flags &= ~ECC_SEQWRITING;
474 }
475
476 /* if we need reading in or not all is yet present, queue reading */
477 if ((eccline->readin) || (eccline->present != allbits))
478 newqueue = UDF_SHED_READING;
479
480 /* reduce the number of kept free buffers */
481 tries = priv->num_queued[UDF_SHED_FREE] - UDF_ECCLINE_MAXFREE;
482 while (tries > 0 /* priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE */) {
483 deccline = udf_pop_eccline(priv, UDF_SHED_FREE);
484 KASSERT(deccline);
485 KASSERT(deccline->refcnt == 0);
486 if (deccline->flags & ECC_WANTED) {
487 udf_push_eccline(deccline, UDF_SHED_IDLE);
488 DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
489 } else {
490 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
491 udf_dispose_eccline(deccline);
492 }
493 tries--;
494 }
495
496 udf_push_eccline(eccline, newqueue);
497
498 KASSERT(eccline->refcnt >= 1);
499 eccline->refcnt--;
500 UDF_UNLOCK_ECCLINE(eccline);
501
502 mutex_exit(&priv->discstrat_mutex);
503 }
504
505 /* --------------------------------------------------------------------- */
506
507 static int
508 udf_create_nodedscr_rmw(struct udf_strat_args *args)
509 {
510 union dscrptr **dscrptr = &args->dscr;
511 struct udf_mount *ump = args->ump;
512 struct long_ad *icb = args->icb;
513 struct udf_eccline *eccline;
514 uint64_t bit;
515 uint32_t sectornr, lb_size, dummy;
516 uint8_t *mem;
517 int error, eccsect;
518
519 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
520 if (error)
521 return error;
522
523 lb_size = udf_rw32(ump->logical_vol->lb_size);
524
525 /* get our eccline */
526 eccline = udf_geteccline(ump, sectornr, 0);
527 eccsect = sectornr - eccline->start_sector;
528
529 bit = (uint64_t) 1 << eccsect;
530 eccline->readin &= ~bit; /* just in case */
531 eccline->present |= bit;
532 eccline->dirty &= ~bit; /* Err... euhm... clean? */
533
534 eccline->refcnt++;
535
536 /* clear space */
537 mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
538 memset(mem, 0, lb_size);
539
540 udf_puteccline(eccline);
541
542 *dscrptr = (union dscrptr *) mem;
543 return 0;
544 }
545
546
547 static void
548 udf_free_nodedscr_rmw(struct udf_strat_args *args)
549 {
550 struct udf_mount *ump = args->ump;
551 struct long_ad *icb = args->icb;
552 struct udf_eccline *eccline;
553 uint64_t bit;
554 uint32_t sectornr, dummy;
555 int error, eccsect;
556
557 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
558 if (error)
559 return;
560
561 /* get our eccline */
562 eccline = udf_geteccline(ump, sectornr, 0);
563 eccsect = sectornr - eccline->start_sector;
564
565 bit = (uint64_t) 1 << eccsect;
566 eccline->readin &= ~bit; /* just in case */
567
568 KASSERT(eccline->refcnt >= 1);
569 eccline->refcnt--;
570
571 udf_puteccline(eccline);
572 }
573
574
575 static int
576 udf_read_nodedscr_rmw(struct udf_strat_args *args)
577 {
578 union dscrptr **dscrptr = &args->dscr;
579 struct udf_mount *ump = args->ump;
580 struct long_ad *icb = args->icb;
581 struct udf_eccline *eccline;
582 uint64_t bit;
583 uint32_t sectornr, dummy;
584 uint8_t *pos;
585 int sector_size = ump->discinfo.sector_size;
586 int lb_size = udf_rw32(ump->logical_vol->lb_size);
587 int i, error, dscrlen, eccsect;
588
589 lb_size = lb_size;
590 KASSERT(sector_size == lb_size);
591 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
592 if (error)
593 return error;
594
595 /* get our eccline */
596 eccline = udf_geteccline(ump, sectornr, 0);
597 eccsect = sectornr - eccline->start_sector;
598
599 bit = (uint64_t) 1 << eccsect;
600 if ((eccline->present & bit) == 0) {
601 /* mark bit for readin */
602 eccline->readin |= bit;
603 eccline->refcnt++; /* prevent recycling */
604 KASSERT(eccline->bufs[eccsect] == NULL);
605 udf_puteccline(eccline);
606
607 /* wait for completion; XXX remodel to lock bit code */
608 error = 0;
609 while ((eccline->present & bit) == 0) {
610 tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
611 if (eccline->error & bit) {
612 KASSERT(eccline->refcnt >= 1);
613 eccline->refcnt--; /* undo temp refcnt */
614 *dscrptr = NULL;
615 return EIO; /* XXX error code */
616 }
617 }
618
619 /* reget our line */
620 eccline = udf_geteccline(ump, sectornr, 0);
621 KASSERT(eccline->refcnt >= 1);
622 eccline->refcnt--; /* undo refcnt */
623 }
624
625 *dscrptr = (union dscrptr *)
626 (((uint8_t *) eccline->blob) + eccsect * sector_size);
627
628 /* code from read_phys_descr */
629 /* check if its a valid tag */
630 error = udf_check_tag(*dscrptr);
631 if (error) {
632 /* check if its an empty block */
633 pos = (uint8_t *) *dscrptr;
634 for (i = 0; i < sector_size; i++, pos++) {
635 if (*pos) break;
636 }
637 if (i == sector_size) {
638 /* return no error but with no dscrptr */
639 error = 0;
640 }
641 *dscrptr = NULL;
642 udf_puteccline(eccline);
643 return error;
644 }
645
646 /* calculate descriptor size */
647 dscrlen = udf_tagsize(*dscrptr, sector_size);
648 error = udf_check_tag_payload(*dscrptr, dscrlen);
649 if (error) {
650 *dscrptr = NULL;
651 udf_puteccline(eccline);
652 return error;
653 }
654
655 eccline->refcnt++;
656 udf_puteccline(eccline);
657
658 return 0;
659 }
660
661
662 static int
663 udf_write_nodedscr_rmw(struct udf_strat_args *args)
664 {
665 union dscrptr *dscrptr = args->dscr;
666 struct udf_mount *ump = args->ump;
667 struct long_ad *icb = args->icb;
668 struct udf_node *udf_node = args->udf_node;
669 struct udf_eccline *eccline;
670 uint64_t bit;
671 uint32_t sectornr, logsectornr, dummy;
672 // int waitfor = args->waitfor;
673 int sector_size = ump->discinfo.sector_size;
674 int lb_size = udf_rw32(ump->logical_vol->lb_size);
675 int error, eccsect;
676
677 lb_size = lb_size;
678 KASSERT(sector_size == lb_size);
679 sectornr = 0;
680 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
681 if (error)
682 return error;
683
684 /* add reference to the vnode to prevent recycling */
685 vhold(udf_node->vnode);
686
687 /* get our eccline */
688 eccline = udf_geteccline(ump, sectornr, 0);
689 eccsect = sectornr - eccline->start_sector;
690
691 bit = (uint64_t) 1 << eccsect;
692
693 /* old callback still pending? */
694 if (eccline->bufs[eccsect]) {
695 DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
696 " over buffer?\n"));
697 nestiobuf_done(eccline->bufs[eccsect],
698 eccline->bufs_len[eccsect],
699 0);
700 eccline->bufs[eccsect] = NULL;
701 }
702
703 /* set sector number in the descriptor and validate */
704 dscrptr = (union dscrptr *)
705 (((uint8_t *) eccline->blob) + eccsect * sector_size);
706 KASSERT(dscrptr == args->dscr);
707
708 logsectornr = udf_rw32(icb->loc.lb_num);
709 dscrptr->tag.tag_loc = udf_rw32(logsectornr);
710 udf_validate_tag_and_crc_sums(dscrptr);
711
712 udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
713
714 /* set our flags */
715 KASSERT(eccline->present & bit);
716 eccline->dirty |= bit;
717
718 KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
719
720 udf_puteccline(eccline);
721
722 holdrele(udf_node->vnode);
723 udf_node->outstanding_nodedscr--;
724 if (udf_node->outstanding_nodedscr == 0) {
725 UDF_UNLOCK_NODE(udf_node, udf_node->i_flags & IN_CALLBACK_ULK);
726 wakeup(&udf_node->outstanding_nodedscr);
727 }
728
729 /* XXX waitfor not used */
730 return 0;
731 }
732
733
734 static void
735 udf_queuebuf_rmw(struct udf_strat_args *args)
736 {
737 struct udf_mount *ump = args->ump;
738 struct buf *buf = args->nestbuf;
739 struct desc_tag *tag;
740 struct strat_private *priv = PRIV(ump);
741 struct udf_eccline *eccline;
742 struct long_ad *node_ad_cpy;
743 uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
744 uint32_t buf_len, len, sectors, sectornr, our_sectornr;
745 uint32_t bpos;
746 uint16_t vpart_num;
747 uint8_t *fidblk, *src, *dst;
748 int sector_size = ump->discinfo.sector_size;
749 int blks = sector_size / DEV_BSIZE;
750 int eccsect, what, queue, error;
751
752 KASSERT(ump);
753 KASSERT(buf);
754 KASSERT(buf->b_iodone == nestiobuf_iodone);
755
756 blknr = buf->b_blkno;
757 our_sectornr = blknr / blks;
758
759 what = buf->b_udf_c_type;
760 queue = UDF_SHED_READING;
761 if ((buf->b_flags & B_READ) == 0) {
762 /* writing */
763 queue = UDF_SHED_SEQWRITING;
764 if (what == UDF_C_DSCR)
765 queue = UDF_SHED_WRITING;
766 if (what == UDF_C_NODE)
767 queue = UDF_SHED_WRITING;
768 }
769
770 if (queue == UDF_SHED_READING) {
771 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
772 "b_resid %d, b_bcount %d, b_bufsize %d\n",
773 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
774 buf->b_resid, buf->b_bcount, buf->b_bufsize));
775
776 /* mark bits for reading */
777 buf_len = buf->b_bcount;
778 sectornr = our_sectornr;
779 eccline = udf_geteccline(ump, sectornr, 0);
780 eccsect = sectornr - eccline->start_sector;
781 bpos = 0;
782 while (buf_len) {
783 len = MIN(buf_len, sector_size);
784 if (eccsect == ump->packet_size) {
785 udf_puteccline(eccline);
786 eccline = udf_geteccline(ump, sectornr, 0);
787 eccsect = sectornr - eccline->start_sector;
788 }
789 bit = (uint64_t) 1 << eccsect;
790 error = eccline->error & bit ? EIO : 0;
791 if (eccline->present & bit) {
792 src = (uint8_t *) eccline->blob +
793 eccsect * sector_size;
794 dst = (uint8_t *) buf->b_data + bpos;
795 if (!error)
796 memcpy(dst, src, len);
797 nestiobuf_done(buf, len, error);
798 } else {
799 eccline->readin |= bit;
800 KASSERT(eccline->bufs[eccsect] == NULL);
801 eccline->bufs[eccsect] = buf;
802 eccline->bufs_bpos[eccsect] = bpos;
803 eccline->bufs_len[eccsect] = len;
804 }
805 bpos += sector_size;
806 eccsect++;
807 sectornr++;
808 buf_len -= len;
809 }
810 udf_puteccline(eccline);
811 return;
812 }
813
814 if (queue == UDF_SHED_WRITING) {
815 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
816 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
817 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
818 buf->b_resid, buf->b_bcount, buf->b_bufsize));
819 /* if we have FIDs fixup using buffer's sector number(s) */
820 if (buf->b_udf_c_type == UDF_C_FIDS) {
821 panic("UDF_C_FIDS in SHED_WRITING!\n");
822 #if 0
823 buf_len = buf->b_bcount;
824 sectornr = our_sectornr;
825 bpos = 0;
826 while (buf_len) {
827 len = MIN(buf_len, sector_size);
828 fidblk = (uint8_t *) buf->b_data + bpos;
829 udf_fixup_fid_block(fidblk, sector_size,
830 0, len, sectornr);
831 sectornr++;
832 bpos += len;
833 buf_len -= len;
834 }
835 #endif
836 }
837 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
838
839 /* copy parts into the bufs and set for writing */
840 buf_len = buf->b_bcount;
841 sectornr = our_sectornr;
842 eccline = udf_geteccline(ump, sectornr, 0);
843 eccsect = sectornr - eccline->start_sector;
844 bpos = 0;
845 while (buf_len) {
846 len = MIN(buf_len, sector_size);
847 if (eccsect == ump->packet_size) {
848 udf_puteccline(eccline);
849 eccline = udf_geteccline(ump, sectornr, 0);
850 eccsect = sectornr - eccline->start_sector;
851 }
852 bit = (uint64_t) 1 << eccsect;
853 KASSERT((eccline->readin & bit) == 0);
854 eccline->present |= bit;
855 eccline->dirty |= bit;
856 if (eccline->bufs[eccsect]) {
857 /* old callback still pending */
858 nestiobuf_done(eccline->bufs[eccsect],
859 eccline->bufs_len[eccsect],
860 0);
861 eccline->bufs[eccsect] = NULL;
862 }
863
864 src = (uint8_t *) buf->b_data + bpos;
865 dst = (uint8_t *) eccline->blob + eccsect * sector_size;
866 if (len != sector_size)
867 memset(dst, 0, sector_size);
868 memcpy(dst, src, len);
869
870 /* note that its finished for this extent */
871 eccline->bufs[eccsect] = NULL;
872 nestiobuf_done(buf, len, 0);
873
874 bpos += sector_size;
875 eccsect++;
876 sectornr++;
877 buf_len -= len;
878 }
879 udf_puteccline(eccline);
880 return;
881
882 }
883
884 /* sequential writing */
885 KASSERT(queue == UDF_SHED_SEQWRITING);
886 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
887 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
888 buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
889 buf->b_bufsize));
890 /*
891 * Buffers should not have been allocated to disc addresses yet on
892 * this queue. Note that a buffer can get multiple extents allocated.
893 * Note that it *looks* like the normal writing but its different in
894 * the details.
895 *
896 * lmapping contains lb_num relative to base partition.
897 *
898 * XXX should we try to claim/organize the allocated memory to
899 * block-aligned pieces?
900 */
901 mutex_enter(&priv->seqwrite_mutex);
902
903 lmapping = ump->la_lmapping;
904 node_ad_cpy = ump->la_node_ad_cpy;
905
906 /* logically allocate buf and map it in the file */
907 udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
908
909 /* if we have FIDs, fixup using the new allocation table */
910 if (buf->b_udf_c_type == UDF_C_FIDS) {
911 buf_len = buf->b_bcount;
912 bpos = 0;
913 lmappos = lmapping;
914 while (buf_len) {
915 sectornr = *lmappos++;
916 len = MIN(buf_len, sector_size);
917 fidblk = (uint8_t *) buf->b_data + bpos;
918 udf_fixup_fid_block(fidblk, sector_size,
919 0, len, sectornr);
920 bpos += len;
921 buf_len -= len;
922 }
923 }
924 if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
925 if (buf->b_lblkno == 0) {
926 /* update the tag location inside */
927 tag = (struct desc_tag *) buf->b_data;
928 tag->tag_loc = udf_rw32(buf->b_blkno);
929 udf_validate_tag_and_crc_sums(buf->b_data);
930 }
931 }
932 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
933
934 /*
935 * Translate new mappings in lmapping to pmappings.
936 * pmapping to contain lb_nums as used for disc adressing.
937 */
938 pmapping = ump->la_pmapping;
939 sectors = (buf->b_bcount + sector_size -1) / sector_size;
940 udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
941
942 /* copy parts into the bufs and set for writing */
943 pmappos = pmapping;
944 buf_len = buf->b_bcount;
945 sectornr = *pmappos++;
946 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
947 eccsect = sectornr - eccline->start_sector;
948 bpos = 0;
949 while (buf_len) {
950 len = MIN(buf_len, sector_size);
951 eccsect = sectornr - eccline->start_sector;
952 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
953 eccline->flags |= ECC_SEQWRITING;
954 udf_puteccline(eccline);
955 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
956 eccsect = sectornr - eccline->start_sector;
957 }
958 bit = (uint64_t) 1 << eccsect;
959 KASSERT((eccline->readin & bit) == 0);
960 eccline->present |= bit;
961 eccline->dirty |= bit;
962 eccline->bufs[eccsect] = NULL;
963
964 src = (uint8_t *) buf->b_data + bpos;
965 dst = (uint8_t *)
966 eccline->blob + eccsect * sector_size;
967 if (len != sector_size)
968 memset(dst, 0, sector_size);
969 memcpy(dst, src, len);
970
971 /* note that its finished for this extent */
972 nestiobuf_done(buf, len, 0);
973
974 bpos += sector_size;
975 sectornr = *pmappos++;
976 buf_len -= len;
977 }
978 eccline->flags |= ECC_SEQWRITING;
979 udf_puteccline(eccline);
980 mutex_exit(&priv->seqwrite_mutex);
981 }
982
983 /* --------------------------------------------------------------------- */
984
985 static void
986 udf_shedule_read_callback(struct buf *buf)
987 {
988 struct udf_eccline *eccline = BTOE(buf);
989 struct udf_mount *ump = eccline->ump;
990 uint64_t bit;
991 uint8_t *src, *dst;
992 int sector_size = ump->discinfo.sector_size;
993 int error, i, len;
994
995 DPRINTF(ECCLINE, ("read callback called\n"));
996 /* post process read action */
997 error = buf->b_error;
998 for (i = 0; i < ump->packet_size; i++) {
999 bit = (uint64_t) 1 << i;
1000 src = (uint8_t *) buf->b_data + i * sector_size;
1001 dst = (uint8_t *) eccline->blob + i * sector_size;
1002 if (eccline->present & bit)
1003 continue;
1004 if (error) {
1005 eccline->error |= bit;
1006 } else {
1007 eccline->present |= bit;
1008 }
1009 if (eccline->bufs[i]) {
1010 dst = (uint8_t *) eccline->bufs[i]->b_data +
1011 eccline->bufs_bpos[i];
1012 len = eccline->bufs_len[i];
1013 if (!error)
1014 memcpy(dst, src, len);
1015 nestiobuf_done(eccline->bufs[i], len, error);
1016 eccline->bufs[i] = NULL;
1017 }
1018
1019 }
1020 KASSERT(buf->b_data == eccline->blob);
1021 KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
1022
1023 /*
1024 * XXX TODO what to do on read errors? read in all sectors
1025 * synchronously and allocate a sparable entry?
1026 */
1027
1028 wakeup(eccline);
1029 udf_puteccline(eccline);
1030 DPRINTF(ECCLINE, ("read callback finished\n"));
1031 }
1032
1033
1034 static void
1035 udf_shedule_write_callback(struct buf *buf)
1036 {
1037 struct udf_eccline *eccline = BTOE(buf);
1038 struct udf_mount *ump = eccline->ump;
1039 uint64_t bit;
1040 int error, i, len;
1041
1042 DPRINTF(ECCLINE, ("write callback called\n"));
1043 /* post process write action */
1044 error = buf->b_error;
1045 for (i = 0; i < ump->packet_size; i++) {
1046 bit = (uint64_t) 1 << i;
1047 if ((eccline->dirty & bit) == 0)
1048 continue;
1049 if (error) {
1050 eccline->error |= bit;
1051 } else {
1052 eccline->dirty &= ~bit;
1053 }
1054 if (eccline->bufs[i]) {
1055 len = eccline->bufs_len[i];
1056 nestiobuf_done(eccline->bufs[i], len, error);
1057 eccline->bufs[i] = NULL;
1058 }
1059 }
1060 KASSERT(eccline->dirty == 0);
1061
1062 KASSERT(error == 0);
1063 /*
1064 * XXX TODO on write errors allocate a sparable entry
1065 */
1066
1067 wakeup(eccline);
1068 udf_puteccline(eccline);
1069 }
1070
1071
1072 static void
1073 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
1074 {
1075 struct udf_mount *ump = eccline->ump;
1076 struct strat_private *priv = PRIV(ump);
1077 struct buf *buf, *nestbuf;
1078 uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
1079 uint32_t start;
1080 int sector_size = ump->discinfo.sector_size;
1081 int blks = sector_size / DEV_BSIZE;
1082 int i;
1083
1084 if (queued_on == UDF_SHED_READING) {
1085 DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
1086 /* read all bits that are not yet present */
1087 eccline->readin = (~eccline->present) & allbits;
1088 KASSERT(eccline->readin);
1089 start = eccline->start_sector;
1090 buf = eccline->buf;
1091 buf_init(buf);
1092 buf->b_flags = B_READ | B_ASYNC;
1093 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1094 buf->b_oflags = 0;
1095 buf->b_iodone = udf_shedule_read_callback;
1096 buf->b_data = eccline->blob;
1097 buf->b_bcount = ump->packet_size * sector_size;
1098 buf->b_resid = buf->b_bcount;
1099 buf->b_bufsize = buf->b_bcount;
1100 buf->b_private = eccline;
1101 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1102 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1103 buf->b_proc = NULL;
1104
1105 if (eccline->present != 0) {
1106 for (i = 0; i < ump->packet_size; i++) {
1107 bit = (uint64_t) 1 << i;
1108 if (eccline->present & bit) {
1109 nestiobuf_done(buf, sector_size, 0);
1110 continue;
1111 }
1112 nestbuf = getiobuf(NULL, true);
1113 nestiobuf_setup(buf, nestbuf, i * sector_size,
1114 sector_size);
1115 /* adjust blocknumber to read */
1116 nestbuf->b_blkno = buf->b_blkno + i*blks;
1117 nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
1118
1119 DPRINTF(SHEDULE, ("sector %d ",
1120 start + i));
1121 /* call asynchronous */
1122 VOP_STRATEGY(ump->devvp, nestbuf);
1123 }
1124 DPRINTF(SHEDULE, ("\n"));
1125 return;
1126 }
1127 } else {
1128 /* write or seqwrite */
1129 DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
1130 if (eccline->present != allbits) {
1131 /* requeue to read-only */
1132 DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
1133 "reading\n"));
1134 udf_push_eccline(eccline, UDF_SHED_READING);
1135 return;
1136 }
1137 start = eccline->start_sector;
1138 buf = eccline->buf;
1139 buf_init(buf);
1140 buf->b_flags = B_WRITE | B_ASYNC;
1141 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1142 buf->b_oflags = 0;
1143 buf->b_iodone = udf_shedule_write_callback;
1144 buf->b_data = eccline->blob;
1145 buf->b_bcount = ump->packet_size * sector_size;
1146 buf->b_resid = buf->b_bcount;
1147 buf->b_bufsize = buf->b_bcount;
1148 buf->b_private = eccline;
1149 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1150 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1151 buf->b_proc = NULL;
1152 }
1153
1154 mutex_exit(&priv->discstrat_mutex);
1155 /* call asynchronous */
1156 DPRINTF(SHEDULE, ("sector %d for %d\n",
1157 start, ump->packet_size));
1158 VOP_STRATEGY(ump->devvp, buf);
1159 mutex_enter(&priv->discstrat_mutex);
1160 }
1161
1162
1163 static void
1164 udf_discstrat_thread(void *arg)
1165 {
1166 struct udf_mount *ump = (struct udf_mount *) arg;
1167 struct strat_private *priv = PRIV(ump);
1168 struct udf_eccline *eccline;
1169 struct timespec now, *last;
1170 int new_queue, wait, work;
1171
1172 work = 1;
1173 mutex_enter(&priv->discstrat_mutex);
1174 priv->num_floating = 0;
1175 while (priv->run_thread || work || priv->num_floating) {
1176 /* process the current selected queue */
1177 /* maintenance: free exess ecclines */
1178 while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
1179 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1180 KASSERT(eccline);
1181 KASSERT(eccline->refcnt == 0);
1182 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
1183 udf_dispose_eccline(eccline);
1184 }
1185
1186 /* get our time */
1187 vfs_timestamp(&now);
1188 last = &priv->last_queued[priv->cur_queue];
1189
1190 /* don't shedule too quickly when there is only one */
1191 if (priv->cur_queue == UDF_SHED_WRITING) {
1192 if (priv->num_queued[priv->cur_queue] <= 2) {
1193 if (now.tv_sec - last->tv_sec < 2) {
1194 /* wait some time */
1195 cv_timedwait(&priv->discstrat_cv,
1196 &priv->discstrat_mutex, hz);
1197 }
1198 }
1199 }
1200
1201 /* get our line */
1202 eccline = udf_pop_eccline(priv, priv->cur_queue);
1203 if (eccline) {
1204 wait = 0;
1205 new_queue = priv->cur_queue;
1206 DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
1207
1208 /* complete the `get' by locking and refcounting it */
1209 UDF_LOCK_ECCLINE(eccline);
1210 eccline->refcnt++;
1211
1212 udf_issue_eccline(eccline, priv->cur_queue);
1213 } else {
1214 wait = 1;
1215 /* check if we can/should switch */
1216 new_queue = priv->cur_queue;
1217 if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
1218 new_queue = UDF_SHED_READING;
1219 if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))
1220 new_queue = UDF_SHED_WRITING;
1221 if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
1222 new_queue = UDF_SHED_SEQWRITING;
1223
1224 /* dont switch seqwriting too fast */
1225 if (priv->cur_queue == UDF_SHED_READING) {
1226 if (now.tv_sec - last->tv_sec < 1)
1227 new_queue = priv->cur_queue;
1228 }
1229 if (priv->cur_queue == UDF_SHED_WRITING) {
1230 if (now.tv_sec - last->tv_sec < 2)
1231 new_queue = priv->cur_queue;
1232 }
1233 if (priv->cur_queue == UDF_SHED_SEQWRITING) {
1234 if (now.tv_sec - last->tv_sec < 4)
1235 new_queue = priv->cur_queue;
1236 }
1237 }
1238
1239 /* give room */
1240 mutex_exit(&priv->discstrat_mutex);
1241
1242 if (new_queue != priv->cur_queue) {
1243 wait = 0;
1244 DPRINTF(SHEDULE, ("switching from %d to %d\n",
1245 priv->cur_queue, new_queue));
1246 priv->cur_queue = new_queue;
1247 }
1248 mutex_enter(&priv->discstrat_mutex);
1249
1250 /* wait for more if needed */
1251 if (wait)
1252 cv_timedwait(&priv->discstrat_cv,
1253 &priv->discstrat_mutex, hz); /* /8 */
1254
1255 work = (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL);
1256 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL);
1257 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
1258
1259 DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
1260 (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL),
1261 (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL),
1262 (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
1263 work, priv->num_floating));
1264 }
1265
1266 mutex_exit(&priv->discstrat_mutex);
1267
1268 /* tear down remaining ecclines */
1269 mutex_enter(&priv->discstrat_mutex);
1270 KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
1271 KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
1272 KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
1273 KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
1274
1275 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_IDLE]) == NULL);
1276 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
1277 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
1278 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
1279 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1280 while (eccline) {
1281 udf_dispose_eccline(eccline);
1282 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1283 }
1284 KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
1285 mutex_exit(&priv->discstrat_mutex);
1286
1287 priv->thread_finished = 1;
1288 wakeup(&priv->run_thread);
1289 kthread_exit(0);
1290 /* not reached */
1291 }
1292
1293 /* --------------------------------------------------------------------- */
1294
1295 /*
1296 * Buffer memory pool allocator.
1297 */
1298
1299 static void *
1300 ecclinepool_page_alloc(struct pool *pp, int flags)
1301 {
1302 return (void *)uvm_km_alloc(kernel_map,
1303 MAXBSIZE, MAXBSIZE,
1304 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
1305 | UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
1306 }
1307
1308 static void
1309 ecclinepool_page_free(struct pool *pp, void *v)
1310 {
1311 uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
1312 }
1313
1314 static struct pool_allocator ecclinepool_allocator = {
1315 .pa_alloc = ecclinepool_page_alloc,
1316 .pa_free = ecclinepool_page_free,
1317 .pa_pagesz = MAXBSIZE,
1318 };
1319
1320
1321 static void
1322 udf_discstrat_init_rmw(struct udf_strat_args *args)
1323 {
1324 struct udf_mount *ump = args->ump;
1325 struct strat_private *priv = PRIV(ump);
1326 uint32_t lb_size, blobsize, hashline;
1327 int i;
1328
1329 KASSERT(ump);
1330 KASSERT(ump->logical_vol);
1331 KASSERT(priv == NULL);
1332
1333 lb_size = udf_rw32(ump->logical_vol->lb_size);
1334 blobsize = ump->packet_size * lb_size;
1335 KASSERT(lb_size > 0);
1336 KASSERT(ump->packet_size <= 64);
1337
1338 /* initialise our memory space */
1339 ump->strategy_private = malloc(sizeof(struct strat_private),
1340 M_UDFTEMP, M_WAITOK);
1341 priv = ump->strategy_private;
1342 memset(priv, 0 , sizeof(struct strat_private));
1343
1344 /* initialise locks */
1345 cv_init(&priv->discstrat_cv, "udfstrat");
1346 mutex_init(&priv->discstrat_mutex, MUTEX_DRIVER, IPL_BIO);
1347 mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
1348
1349 /* initialise struct eccline pool */
1350 pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
1351 0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
1352
1353 /* initialise eccline blob pool */
1354 pool_init(&priv->ecclineblob_pool, blobsize,
1355 0,0,0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
1356
1357 /* initialise main queues */
1358 for (i = 0; i < UDF_SHED_MAX; i++) {
1359 priv->num_queued[i] = 0;
1360 vfs_timestamp(&priv->last_queued[i]);
1361 }
1362 bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
1363 BUFQ_SORT_RAWBLOCK);
1364 bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
1365 BUFQ_SORT_RAWBLOCK);
1366 bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
1367
1368 /* initialise administrative queues */
1369 bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
1370 bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
1371
1372 for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
1373 LIST_INIT(&priv->eccline_hash[hashline]);
1374 }
1375
1376 /* create our disk strategy thread */
1377 priv->cur_queue = UDF_SHED_READING;
1378 priv->thread_finished = 0;
1379 priv->run_thread = 1;
1380 if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
1381 udf_discstrat_thread, ump, &priv->queue_lwp,
1382 "%s", "udf_rw")) {
1383 panic("fork udf_rw");
1384 }
1385 }
1386
1387
1388 static void
1389 udf_discstrat_finish_rmw(struct udf_strat_args *args)
1390 {
1391 struct udf_mount *ump = args->ump;
1392 struct strat_private *priv = PRIV(ump);
1393 int error;
1394
1395 if (ump == NULL)
1396 return;
1397
1398 /* stop our sheduling thread */
1399 KASSERT(priv->run_thread == 1);
1400 priv->run_thread = 0;
1401 wakeup(priv->queue_lwp);
1402 while (!priv->thread_finished) {
1403 error = tsleep(&priv->run_thread, PRIBIO+1,
1404 "udfshedfin", hz);
1405 }
1406 /* kthread should be finished now */
1407
1408 /* cleanup our pools */
1409 pool_destroy(&priv->eccline_pool);
1410 pool_destroy(&priv->ecclineblob_pool);
1411
1412 cv_destroy(&priv->discstrat_cv);
1413 mutex_destroy(&priv->discstrat_mutex);
1414 mutex_destroy(&priv->seqwrite_mutex);
1415
1416 /* free our private space */
1417 free(ump->strategy_private, M_UDFTEMP);
1418 ump->strategy_private = NULL;
1419 }
1420
1421 /* --------------------------------------------------------------------- */
1422
1423 struct udf_strategy udf_strat_rmw =
1424 {
1425 udf_create_nodedscr_rmw,
1426 udf_free_nodedscr_rmw,
1427 udf_read_nodedscr_rmw,
1428 udf_write_nodedscr_rmw,
1429 udf_queuebuf_rmw,
1430 udf_discstrat_init_rmw,
1431 udf_discstrat_finish_rmw
1432 };
1433
1434