udf_strat_rmw.c revision 1.11 1 /* $NetBSD: udf_strat_rmw.c,v 1.11 2008/11/01 23:51:25 reinoud Exp $ */
2
3 /*
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.11 2008/11/01 23:51:25 reinoud Exp $");
32 #endif /* not lint */
33
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_quota.h"
37 #include "opt_compat_netbsd.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysctl.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <miscfs/genfs/genfs_node.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/file.h>
51 #include <sys/device.h>
52 #include <sys/disklabel.h>
53 #include <sys/ioctl.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/stat.h>
57 #include <sys/conf.h>
58 #include <sys/kauth.h>
59 #include <sys/kthread.h>
60 #include <dev/clock_subr.h>
61
62 #include <fs/udf/ecma167-udf.h>
63 #include <fs/udf/udf_mount.h>
64
65 #include "udf.h"
66 #include "udf_subr.h"
67 #include "udf_bswap.h"
68
69
70 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
71 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
72 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
73
74 /* --------------------------------------------------------------------- */
75
76 #define UDF_MAX_PACKET_SIZE 64 /* DONT change this */
77
78 /* sheduler states */
79 #define UDF_SHED_WAITING 1 /* waiting on timeout */
80 #define UDF_SHED_READING 2
81 #define UDF_SHED_WRITING 3
82 #define UDF_SHED_SEQWRITING 4
83 #define UDF_SHED_IDLE 5 /* resting */
84 #define UDF_SHED_FREE 6 /* recycleable */
85 #define UDF_SHED_MAX 6+1
86
87 /* flags */
88 #define ECC_LOCKED 0x01 /* prevent access */
89 #define ECC_WANTED 0x02 /* trying access */
90 #define ECC_SEQWRITING 0x04 /* sequential queue */
91 #define ECC_FLOATING 0x08 /* not queued yet */
92
93 #define ECC_WAITTIME 5
94
95
96 TAILQ_HEAD(ecclineq, udf_eccline);
97 struct udf_eccline {
98 struct udf_mount *ump;
99 uint64_t present; /* preserve these */
100 uint64_t readin; /* bitmap */
101 uint64_t dirty; /* bitmap */
102 uint64_t error; /* bitmap */
103 uint32_t refcnt;
104
105 struct timespec wait_time;
106 uint32_t flags;
107 uint32_t start_sector; /* physical */
108
109 struct buf *buf;
110 void *blob;
111
112 struct buf *bufs[UDF_MAX_PACKET_SIZE];
113 uint32_t bufs_bpos[UDF_MAX_PACKET_SIZE];
114 int bufs_len[UDF_MAX_PACKET_SIZE];
115
116 int queued_on; /* on which BUFQ list */
117 LIST_ENTRY(udf_eccline) hashchain; /* on sector lookup */
118 };
119
120
121 struct strat_private {
122 lwp_t *queue_lwp;
123 kcondvar_t discstrat_cv; /* to wait on */
124 kmutex_t discstrat_mutex; /* disc strategy */
125 kmutex_t seqwrite_mutex; /* protect mappings */
126
127 int run_thread; /* thread control */
128 int thread_finished; /* thread control */
129 int cur_queue;
130
131 int num_floating;
132 int num_queued[UDF_SHED_MAX];
133 struct bufq_state *queues[UDF_SHED_MAX];
134 struct timespec last_queued[UDF_SHED_MAX];
135 struct disk_strategy old_strategy_setting;
136
137 struct pool eccline_pool;
138 struct pool ecclineblob_pool;
139 LIST_HEAD(, udf_eccline) eccline_hash[UDF_ECCBUF_HASHSIZE];
140 };
141
142 /* --------------------------------------------------------------------- */
143
144 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
145 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
146
147 /* can be called with or without discstrat lock */
148 static void
149 udf_lock_eccline(struct udf_eccline *eccline)
150 {
151 struct strat_private *priv = PRIV(eccline->ump);
152 int waslocked, ret;
153
154 waslocked = mutex_owned(&priv->discstrat_mutex);
155 if (!waslocked)
156 mutex_enter(&priv->discstrat_mutex);
157
158 /* wait until its unlocked first */
159 while (eccline->flags & ECC_LOCKED) {
160 eccline->flags |= ECC_WANTED;
161 ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
162 hz/8);
163 if (ret == EWOULDBLOCK)
164 DPRINTF(LOCKING, ("eccline lock helt, waiting for "
165 "release"));
166 }
167 eccline->flags |= ECC_LOCKED;
168 eccline->flags &= ~ECC_WANTED;
169
170 if (!waslocked)
171 mutex_exit(&priv->discstrat_mutex);
172 }
173
174
175 /* can be called with or without discstrat lock */
176 static void
177 udf_unlock_eccline(struct udf_eccline *eccline)
178 {
179 struct strat_private *priv = PRIV(eccline->ump);
180 int waslocked;
181
182 KASSERT(mutex_owned(&priv->discstrat_mutex));
183
184 waslocked = mutex_owned(&priv->discstrat_mutex);
185 if (!waslocked)
186 mutex_enter(&priv->discstrat_mutex);
187
188 eccline->flags &= ~ECC_LOCKED;
189 cv_broadcast(&priv->discstrat_cv);
190
191 if (!waslocked)
192 mutex_exit(&priv->discstrat_mutex);
193 }
194
195
196 /* NOTE discstrat_mutex should be held! */
197 static void
198 udf_dispose_eccline(struct udf_eccline *eccline)
199 {
200 struct strat_private *priv = PRIV(eccline->ump);
201 struct buf *ret;
202
203 KASSERT(mutex_owned(&priv->discstrat_mutex));
204
205 KASSERT(eccline->refcnt == 0);
206 KASSERT(eccline->dirty == 0);
207
208 DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
209 "present %0"PRIx64"\n", eccline->start_sector,
210 eccline->present));
211
212 if (eccline->queued_on) {
213 ret = BUFQ_CANCEL(priv->queues[eccline->queued_on], eccline->buf);
214 KASSERT(ret == eccline->buf);
215 priv->num_queued[eccline->queued_on]--;
216 }
217 LIST_REMOVE(eccline, hashchain);
218
219 if (eccline->flags & ECC_FLOATING) {
220 eccline->flags &= ~ECC_FLOATING;
221 priv->num_floating--;
222 }
223
224 putiobuf(eccline->buf);
225 pool_put(&priv->ecclineblob_pool, eccline->blob);
226 pool_put(&priv->eccline_pool, eccline);
227 }
228
229
230 /* NOTE discstrat_mutex should be held! */
231 static void
232 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
233 {
234 struct strat_private *priv = PRIV(eccline->ump);
235 struct buf *ret;
236 int curqueue;
237
238 KASSERT(mutex_owned(&priv->discstrat_mutex));
239
240 DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
241
242 /* requeue */
243 curqueue = eccline->queued_on;
244 if (curqueue) {
245 ret = BUFQ_CANCEL(priv->queues[curqueue], eccline->buf);
246
247 DPRINTF(PARANOIA, ("push_eccline BUFQ_CANCEL returned %p when "
248 "requested to remove %p from queue %d\n", ret,
249 eccline->buf, curqueue));
250 #ifdef DIAGNOSTIC
251 if (ret == NULL) {
252 int i;
253
254 printf("udf_push_eccline: bufq_cancel can't find "
255 "buffer; dumping queues\n");
256 for (i = 1; i < UDF_SHED_MAX; i++) {
257 printf("queue %d\n\t", i);
258 ret = BUFQ_GET(priv->queues[i]);
259 while (ret) {
260 printf("%p ", ret);
261 if (ret == eccline->buf)
262 printf("[<-] ");
263 ret = BUFQ_GET(priv->queues[i]);
264 }
265 printf("\n");
266 }
267 panic("fatal queue bug; exit");
268 }
269 #endif
270
271 KASSERT(ret == eccline->buf);
272 priv->num_queued[curqueue]--;
273 }
274
275 /* set buffer block numbers to make sure its queued correctly */
276 eccline->buf->b_lblkno = eccline->start_sector;
277 eccline->buf->b_blkno = eccline->start_sector;
278 eccline->buf->b_rawblkno = eccline->start_sector;
279
280 BUFQ_PUT(priv->queues[newqueue], eccline->buf);
281 eccline->queued_on = newqueue;
282 priv->num_queued[newqueue]++;
283 vfs_timestamp(&priv->last_queued[newqueue]);
284
285 if (eccline->flags & ECC_FLOATING) {
286 eccline->flags &= ~ECC_FLOATING;
287 priv->num_floating--;
288 }
289
290 if ((newqueue != UDF_SHED_FREE) && (newqueue != UDF_SHED_IDLE))
291 cv_signal(&priv->discstrat_cv);
292 }
293
294
295 static struct udf_eccline *
296 udf_pop_eccline(struct strat_private *priv, int queued_on)
297 {
298 struct udf_eccline *eccline;
299 struct buf *buf;
300
301 KASSERT(mutex_owned(&priv->discstrat_mutex));
302
303 buf = BUFQ_GET(priv->queues[queued_on]);
304 if (!buf) {
305 KASSERT(priv->num_queued[queued_on] == 0);
306 return NULL;
307 }
308
309 eccline = BTOE(buf);
310 KASSERT(eccline->queued_on == queued_on);
311 eccline->queued_on = 0;
312 priv->num_queued[queued_on]--;
313
314 if (eccline->flags & ECC_FLOATING)
315 panic("popping already marked floating eccline");
316 eccline->flags |= ECC_FLOATING;
317 priv->num_floating++;
318
319 DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
320 eccline->buf, queued_on));
321
322 return eccline;
323 }
324
325
326 static struct udf_eccline *
327 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
328 {
329 struct strat_private *priv = PRIV(ump);
330 struct udf_eccline *eccline;
331 uint32_t start_sector, lb_size, blobsize;
332 uint8_t *eccline_blob;
333 int line, line_offset;
334 int num_busy, ret;
335
336 line_offset = sector % ump->packet_size;
337 start_sector = sector - line_offset;
338 line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
339
340 mutex_enter(&priv->discstrat_mutex);
341
342 retry:
343 DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
344 LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
345 if (eccline->start_sector == start_sector) {
346 DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
347 eccline->start_sector));
348
349 UDF_LOCK_ECCLINE(eccline);
350 /* move from freelist (!) */
351 if (eccline->queued_on == UDF_SHED_FREE) {
352 DPRINTF(ECCLINE, ("was on freelist\n"));
353 KASSERT(eccline->refcnt == 0);
354 udf_push_eccline(eccline, UDF_SHED_IDLE);
355 }
356 eccline->refcnt++;
357 mutex_exit(&priv->discstrat_mutex);
358 return eccline;
359 }
360 }
361
362 DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
363 /* not found in eccline cache */
364
365 lb_size = udf_rw32(ump->logical_vol->lb_size);
366 blobsize = ump->packet_size * lb_size;
367
368 /* dont allow too many pending requests */
369 DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
370 num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
371 if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
372 ret = cv_timedwait(&priv->discstrat_cv,
373 &priv->discstrat_mutex, hz/8);
374 goto retry;
375 }
376
377 eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
378 eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
379 if ((eccline_blob == NULL) || (eccline == NULL)) {
380 if (eccline_blob)
381 pool_put(&priv->ecclineblob_pool, eccline_blob);
382 if (eccline)
383 pool_put(&priv->eccline_pool, eccline);
384
385 /* out of memory for now; canibalise freelist */
386 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
387 if (eccline == NULL) {
388 /* serious trouble; wait and retry */
389 cv_timedwait(&priv->discstrat_cv,
390 &priv->discstrat_mutex, hz/8);
391 goto retry;
392 }
393 /* push back line if we're waiting for it */
394 if (eccline->flags & ECC_WANTED) {
395 udf_push_eccline(eccline, UDF_SHED_IDLE);
396 goto retry;
397 }
398
399 /* unlink this entry */
400 LIST_REMOVE(eccline, hashchain);
401
402 KASSERT(eccline->flags & ECC_FLOATING);
403
404 eccline_blob = eccline->blob;
405 memset(eccline, 0, sizeof(struct udf_eccline));
406 eccline->flags = ECC_FLOATING;
407 } else {
408 memset(eccline, 0, sizeof(struct udf_eccline));
409 eccline->flags = ECC_FLOATING;
410 priv->num_floating++;
411 }
412
413 eccline->queued_on = 0;
414 eccline->blob = eccline_blob;
415 eccline->buf = getiobuf(NULL, true);
416 eccline->buf->b_private = eccline; /* IMPORTANT */
417
418 /* initialise eccline blob */
419 memset(eccline->blob, 0, blobsize);
420
421 eccline->ump = ump;
422 eccline->present = eccline->readin = eccline->dirty = 0;
423 eccline->error = 0;
424 eccline->refcnt = 0;
425
426 eccline->start_sector = start_sector;
427 eccline->buf->b_lblkno = start_sector;
428 eccline->buf->b_blkno = start_sector;
429 eccline->buf->b_rawblkno = start_sector;
430
431 LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
432
433 /*
434 * TODO possible optimalisation for checking overlap with partitions
435 * to get a clue on future eccline usage
436 */
437 eccline->refcnt++;
438 UDF_LOCK_ECCLINE(eccline);
439
440 mutex_exit(&priv->discstrat_mutex);
441
442 return eccline;
443 }
444
445
446 static void
447 udf_puteccline(struct udf_eccline *eccline)
448 {
449 struct strat_private *priv = PRIV(eccline->ump);
450 struct udf_mount *ump = eccline->ump;
451 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
452
453 mutex_enter(&priv->discstrat_mutex);
454
455 /* clear directly all readin requests from present ones */
456 if (eccline->readin & eccline->present) {
457 /* clear all read bits that are already read in */
458 eccline->readin &= (~eccline->present) & allbits;
459 wakeup(eccline);
460 }
461
462 DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
463 eccline->start_sector, eccline->refcnt));
464
465 /* if we have active nodes we dont set it on seqwriting */
466 if (eccline->refcnt > 1)
467 eccline->flags &= ~ECC_SEQWRITING;
468
469 vfs_timestamp(&eccline->wait_time);
470 eccline->wait_time.tv_sec += ECC_WAITTIME;
471 udf_push_eccline(eccline, UDF_SHED_WAITING);
472
473 KASSERT(eccline->refcnt >= 1);
474 eccline->refcnt--;
475 UDF_UNLOCK_ECCLINE(eccline);
476
477 mutex_exit(&priv->discstrat_mutex);
478 }
479
480 /* --------------------------------------------------------------------- */
481
482 static int
483 udf_create_nodedscr_rmw(struct udf_strat_args *args)
484 {
485 union dscrptr **dscrptr = &args->dscr;
486 struct udf_mount *ump = args->ump;
487 struct long_ad *icb = args->icb;
488 struct udf_eccline *eccline;
489 uint64_t bit;
490 uint32_t sectornr, lb_size, dummy;
491 uint8_t *mem;
492 int error, eccsect;
493
494 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
495 if (error)
496 return error;
497
498 lb_size = udf_rw32(ump->logical_vol->lb_size);
499
500 /* get our eccline */
501 eccline = udf_geteccline(ump, sectornr, 0);
502 eccsect = sectornr - eccline->start_sector;
503
504 bit = (uint64_t) 1 << eccsect;
505 eccline->readin &= ~bit; /* just in case */
506 eccline->present |= bit;
507 eccline->dirty &= ~bit; /* Err... euhm... clean? */
508
509 eccline->refcnt++;
510
511 /* clear space */
512 mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
513 memset(mem, 0, lb_size);
514
515 udf_puteccline(eccline);
516
517 *dscrptr = (union dscrptr *) mem;
518 return 0;
519 }
520
521
522 static void
523 udf_free_nodedscr_rmw(struct udf_strat_args *args)
524 {
525 struct udf_mount *ump = args->ump;
526 struct long_ad *icb = args->icb;
527 struct udf_eccline *eccline;
528 uint64_t bit;
529 uint32_t sectornr, dummy;
530 int error, eccsect;
531
532 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
533 if (error)
534 return;
535
536 /* get our eccline */
537 eccline = udf_geteccline(ump, sectornr, 0);
538 eccsect = sectornr - eccline->start_sector;
539
540 bit = (uint64_t) 1 << eccsect;
541 eccline->readin &= ~bit; /* just in case */
542
543 KASSERT(eccline->refcnt >= 1);
544 eccline->refcnt--;
545
546 udf_puteccline(eccline);
547 }
548
549
550 static int
551 udf_read_nodedscr_rmw(struct udf_strat_args *args)
552 {
553 union dscrptr **dscrptr = &args->dscr;
554 struct udf_mount *ump = args->ump;
555 struct long_ad *icb = args->icb;
556 struct udf_eccline *eccline;
557 uint64_t bit;
558 uint32_t sectornr, dummy;
559 uint8_t *pos;
560 int sector_size = ump->discinfo.sector_size;
561 int lb_size = udf_rw32(ump->logical_vol->lb_size);
562 int i, error, dscrlen, eccsect;
563
564 lb_size = lb_size;
565 KASSERT(sector_size == lb_size);
566 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
567 if (error)
568 return error;
569
570 /* get our eccline */
571 eccline = udf_geteccline(ump, sectornr, 0);
572 eccsect = sectornr - eccline->start_sector;
573
574 bit = (uint64_t) 1 << eccsect;
575 if ((eccline->present & bit) == 0) {
576 /* mark bit for readin */
577 eccline->readin |= bit;
578 eccline->refcnt++; /* prevent recycling */
579 KASSERT(eccline->bufs[eccsect] == NULL);
580 udf_puteccline(eccline);
581
582 /* wait for completion; XXX remodel to lock bit code */
583 error = 0;
584 while ((eccline->present & bit) == 0) {
585 tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
586 if (eccline->error & bit) {
587 KASSERT(eccline->refcnt >= 1);
588 eccline->refcnt--; /* undo temp refcnt */
589 *dscrptr = NULL;
590 return EIO; /* XXX error code */
591 }
592 }
593
594 /* reget our line */
595 eccline = udf_geteccline(ump, sectornr, 0);
596 KASSERT(eccline->refcnt >= 1);
597 eccline->refcnt--; /* undo refcnt */
598 }
599
600 *dscrptr = (union dscrptr *)
601 (((uint8_t *) eccline->blob) + eccsect * sector_size);
602
603 /* code from read_phys_descr */
604 /* check if its a valid tag */
605 error = udf_check_tag(*dscrptr);
606 if (error) {
607 /* check if its an empty block */
608 pos = (uint8_t *) *dscrptr;
609 for (i = 0; i < sector_size; i++, pos++) {
610 if (*pos) break;
611 }
612 if (i == sector_size) {
613 /* return no error but with no dscrptr */
614 error = 0;
615 }
616 *dscrptr = NULL;
617 udf_puteccline(eccline);
618 return error;
619 }
620
621 /* calculate descriptor size */
622 dscrlen = udf_tagsize(*dscrptr, sector_size);
623 error = udf_check_tag_payload(*dscrptr, dscrlen);
624 if (error) {
625 *dscrptr = NULL;
626 udf_puteccline(eccline);
627 return error;
628 }
629
630 eccline->refcnt++;
631 udf_puteccline(eccline);
632
633 return 0;
634 }
635
636
637 static int
638 udf_write_nodedscr_rmw(struct udf_strat_args *args)
639 {
640 union dscrptr *dscrptr = args->dscr;
641 struct udf_mount *ump = args->ump;
642 struct long_ad *icb = args->icb;
643 struct udf_node *udf_node = args->udf_node;
644 struct udf_eccline *eccline;
645 uint64_t bit;
646 uint32_t sectornr, logsectornr, dummy;
647 // int waitfor = args->waitfor;
648 int sector_size = ump->discinfo.sector_size;
649 int lb_size = udf_rw32(ump->logical_vol->lb_size);
650 int error, eccsect;
651
652 lb_size = lb_size;
653 KASSERT(sector_size == lb_size);
654 sectornr = 0;
655 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
656 if (error)
657 return error;
658
659 /* add reference to the vnode to prevent recycling */
660 vhold(udf_node->vnode);
661
662 /* get our eccline */
663 eccline = udf_geteccline(ump, sectornr, 0);
664 eccsect = sectornr - eccline->start_sector;
665
666 bit = (uint64_t) 1 << eccsect;
667
668 /* old callback still pending? */
669 if (eccline->bufs[eccsect]) {
670 DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
671 " over buffer?\n"));
672 nestiobuf_done(eccline->bufs[eccsect],
673 eccline->bufs_len[eccsect],
674 0);
675 eccline->bufs[eccsect] = NULL;
676 }
677
678 /* set sector number in the descriptor and validate */
679 dscrptr = (union dscrptr *)
680 (((uint8_t *) eccline->blob) + eccsect * sector_size);
681 KASSERT(dscrptr == args->dscr);
682
683 logsectornr = udf_rw32(icb->loc.lb_num);
684 dscrptr->tag.tag_loc = udf_rw32(logsectornr);
685 udf_validate_tag_and_crc_sums(dscrptr);
686
687 udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
688
689 /* set our flags */
690 KASSERT(eccline->present & bit);
691 eccline->dirty |= bit;
692
693 KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
694
695 udf_puteccline(eccline);
696
697 holdrele(udf_node->vnode);
698 udf_node->outstanding_nodedscr--;
699 if (udf_node->outstanding_nodedscr == 0) {
700 UDF_UNLOCK_NODE(udf_node, udf_node->i_flags & IN_CALLBACK_ULK);
701 wakeup(&udf_node->outstanding_nodedscr);
702 }
703
704 /* XXX waitfor not used */
705 return 0;
706 }
707
708
709 static void
710 udf_queuebuf_rmw(struct udf_strat_args *args)
711 {
712 struct udf_mount *ump = args->ump;
713 struct buf *buf = args->nestbuf;
714 struct desc_tag *tag;
715 struct strat_private *priv = PRIV(ump);
716 struct udf_eccline *eccline;
717 struct long_ad *node_ad_cpy;
718 uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
719 uint32_t buf_len, len, sectors, sectornr, our_sectornr;
720 uint32_t bpos;
721 uint16_t vpart_num;
722 uint8_t *fidblk, *src, *dst;
723 int sector_size = ump->discinfo.sector_size;
724 int blks = sector_size / DEV_BSIZE;
725 int eccsect, what, queue, error;
726
727 KASSERT(ump);
728 KASSERT(buf);
729 KASSERT(buf->b_iodone == nestiobuf_iodone);
730
731 blknr = buf->b_blkno;
732 our_sectornr = blknr / blks;
733
734 what = buf->b_udf_c_type;
735 queue = UDF_SHED_READING;
736 if ((buf->b_flags & B_READ) == 0) {
737 /* writing */
738 queue = UDF_SHED_SEQWRITING;
739 if (what == UDF_C_DSCR)
740 queue = UDF_SHED_WRITING;
741 if (what == UDF_C_NODE)
742 queue = UDF_SHED_WRITING;
743 }
744
745 if (queue == UDF_SHED_READING) {
746 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
747 "b_resid %d, b_bcount %d, b_bufsize %d\n",
748 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
749 buf->b_resid, buf->b_bcount, buf->b_bufsize));
750
751 /* mark bits for reading */
752 buf_len = buf->b_bcount;
753 sectornr = our_sectornr;
754 eccline = udf_geteccline(ump, sectornr, 0);
755 eccsect = sectornr - eccline->start_sector;
756 bpos = 0;
757 while (buf_len) {
758 len = MIN(buf_len, sector_size);
759 if (eccsect == ump->packet_size) {
760 udf_puteccline(eccline);
761 eccline = udf_geteccline(ump, sectornr, 0);
762 eccsect = sectornr - eccline->start_sector;
763 }
764 bit = (uint64_t) 1 << eccsect;
765 error = eccline->error & bit ? EIO : 0;
766 if (eccline->present & bit) {
767 src = (uint8_t *) eccline->blob +
768 eccsect * sector_size;
769 dst = (uint8_t *) buf->b_data + bpos;
770 if (!error)
771 memcpy(dst, src, len);
772 nestiobuf_done(buf, len, error);
773 } else {
774 eccline->readin |= bit;
775 KASSERT(eccline->bufs[eccsect] == NULL);
776 eccline->bufs[eccsect] = buf;
777 eccline->bufs_bpos[eccsect] = bpos;
778 eccline->bufs_len[eccsect] = len;
779 }
780 bpos += sector_size;
781 eccsect++;
782 sectornr++;
783 buf_len -= len;
784 }
785 udf_puteccline(eccline);
786 return;
787 }
788
789 if (queue == UDF_SHED_WRITING) {
790 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
791 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
792 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
793 buf->b_resid, buf->b_bcount, buf->b_bufsize));
794 /* if we have FIDs fixup using buffer's sector number(s) */
795 if (buf->b_udf_c_type == UDF_C_FIDS) {
796 panic("UDF_C_FIDS in SHED_WRITING!\n");
797 #if 0
798 buf_len = buf->b_bcount;
799 sectornr = our_sectornr;
800 bpos = 0;
801 while (buf_len) {
802 len = MIN(buf_len, sector_size);
803 fidblk = (uint8_t *) buf->b_data + bpos;
804 udf_fixup_fid_block(fidblk, sector_size,
805 0, len, sectornr);
806 sectornr++;
807 bpos += len;
808 buf_len -= len;
809 }
810 #endif
811 }
812 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
813
814 /* copy parts into the bufs and set for writing */
815 buf_len = buf->b_bcount;
816 sectornr = our_sectornr;
817 eccline = udf_geteccline(ump, sectornr, 0);
818 eccsect = sectornr - eccline->start_sector;
819 bpos = 0;
820 while (buf_len) {
821 len = MIN(buf_len, sector_size);
822 if (eccsect == ump->packet_size) {
823 udf_puteccline(eccline);
824 eccline = udf_geteccline(ump, sectornr, 0);
825 eccsect = sectornr - eccline->start_sector;
826 }
827 bit = (uint64_t) 1 << eccsect;
828 KASSERT((eccline->readin & bit) == 0);
829 eccline->present |= bit;
830 eccline->dirty |= bit;
831 if (eccline->bufs[eccsect]) {
832 /* old callback still pending */
833 nestiobuf_done(eccline->bufs[eccsect],
834 eccline->bufs_len[eccsect],
835 0);
836 eccline->bufs[eccsect] = NULL;
837 }
838
839 src = (uint8_t *) buf->b_data + bpos;
840 dst = (uint8_t *) eccline->blob + eccsect * sector_size;
841 if (len != sector_size)
842 memset(dst, 0, sector_size);
843 memcpy(dst, src, len);
844
845 /* note that its finished for this extent */
846 eccline->bufs[eccsect] = NULL;
847 nestiobuf_done(buf, len, 0);
848
849 bpos += sector_size;
850 eccsect++;
851 sectornr++;
852 buf_len -= len;
853 }
854 udf_puteccline(eccline);
855 return;
856
857 }
858
859 /* sequential writing */
860 KASSERT(queue == UDF_SHED_SEQWRITING);
861 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
862 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
863 buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
864 buf->b_bufsize));
865 /*
866 * Buffers should not have been allocated to disc addresses yet on
867 * this queue. Note that a buffer can get multiple extents allocated.
868 * Note that it *looks* like the normal writing but its different in
869 * the details.
870 *
871 * lmapping contains lb_num relative to base partition.
872 *
873 * XXX should we try to claim/organize the allocated memory to
874 * block-aligned pieces?
875 */
876 mutex_enter(&priv->seqwrite_mutex);
877
878 lmapping = ump->la_lmapping;
879 node_ad_cpy = ump->la_node_ad_cpy;
880
881 /* logically allocate buf and map it in the file */
882 udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
883
884 /* if we have FIDs, fixup using the new allocation table */
885 if (buf->b_udf_c_type == UDF_C_FIDS) {
886 buf_len = buf->b_bcount;
887 bpos = 0;
888 lmappos = lmapping;
889 while (buf_len) {
890 sectornr = *lmappos++;
891 len = MIN(buf_len, sector_size);
892 fidblk = (uint8_t *) buf->b_data + bpos;
893 udf_fixup_fid_block(fidblk, sector_size,
894 0, len, sectornr);
895 bpos += len;
896 buf_len -= len;
897 }
898 }
899 if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
900 if (buf->b_lblkno == 0) {
901 /* update the tag location inside */
902 tag = (struct desc_tag *) buf->b_data;
903 tag->tag_loc = udf_rw32(*lmapping);
904 udf_validate_tag_and_crc_sums(buf->b_data);
905 }
906 }
907 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
908
909 /*
910 * Translate new mappings in lmapping to pmappings.
911 * pmapping to contain lb_nums as used for disc adressing.
912 */
913 pmapping = ump->la_pmapping;
914 sectors = (buf->b_bcount + sector_size -1) / sector_size;
915 udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
916
917 /* copy parts into the bufs and set for writing */
918 pmappos = pmapping;
919 buf_len = buf->b_bcount;
920 sectornr = *pmappos++;
921 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
922 eccsect = sectornr - eccline->start_sector;
923 bpos = 0;
924 while (buf_len) {
925 len = MIN(buf_len, sector_size);
926 eccsect = sectornr - eccline->start_sector;
927 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
928 eccline->flags |= ECC_SEQWRITING;
929 udf_puteccline(eccline);
930 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
931 eccsect = sectornr - eccline->start_sector;
932 }
933 bit = (uint64_t) 1 << eccsect;
934 KASSERT((eccline->readin & bit) == 0);
935 eccline->present |= bit;
936 eccline->dirty |= bit;
937 eccline->bufs[eccsect] = NULL;
938
939 src = (uint8_t *) buf->b_data + bpos;
940 dst = (uint8_t *)
941 eccline->blob + eccsect * sector_size;
942 if (len != sector_size)
943 memset(dst, 0, sector_size);
944 memcpy(dst, src, len);
945
946 /* note that its finished for this extent */
947 nestiobuf_done(buf, len, 0);
948
949 bpos += sector_size;
950 sectornr = *pmappos++;
951 buf_len -= len;
952 }
953 eccline->flags |= ECC_SEQWRITING;
954 udf_puteccline(eccline);
955 mutex_exit(&priv->seqwrite_mutex);
956 }
957
958 /* --------------------------------------------------------------------- */
959
960 static void
961 udf_shedule_read_callback(struct buf *buf)
962 {
963 struct udf_eccline *eccline = BTOE(buf);
964 struct udf_mount *ump = eccline->ump;
965 uint64_t bit;
966 uint8_t *src, *dst;
967 int sector_size = ump->discinfo.sector_size;
968 int error, i, len;
969
970 DPRINTF(ECCLINE, ("read callback called\n"));
971 /* post process read action */
972 error = buf->b_error;
973 for (i = 0; i < ump->packet_size; i++) {
974 bit = (uint64_t) 1 << i;
975 src = (uint8_t *) buf->b_data + i * sector_size;
976 dst = (uint8_t *) eccline->blob + i * sector_size;
977 if (eccline->present & bit)
978 continue;
979 eccline->present |= bit;
980 if (error)
981 eccline->error |= bit;
982 if (eccline->bufs[i]) {
983 dst = (uint8_t *) eccline->bufs[i]->b_data +
984 eccline->bufs_bpos[i];
985 len = eccline->bufs_len[i];
986 if (!error)
987 memcpy(dst, src, len);
988 nestiobuf_done(eccline->bufs[i], len, error);
989 eccline->bufs[i] = NULL;
990 }
991
992 }
993 KASSERT(buf->b_data == eccline->blob);
994 KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
995
996 /*
997 * XXX TODO what to do on read errors? read in all sectors
998 * synchronously and allocate a sparable entry?
999 */
1000
1001 wakeup(eccline);
1002 udf_puteccline(eccline);
1003 DPRINTF(ECCLINE, ("read callback finished\n"));
1004 }
1005
1006
1007 static void
1008 udf_shedule_write_callback(struct buf *buf)
1009 {
1010 struct udf_eccline *eccline = BTOE(buf);
1011 struct udf_mount *ump = eccline->ump;
1012 uint64_t bit;
1013 int error, i, len;
1014
1015 DPRINTF(ECCLINE, ("write callback called\n"));
1016 /* post process write action */
1017 error = buf->b_error;
1018 for (i = 0; i < ump->packet_size; i++) {
1019 bit = (uint64_t) 1 << i;
1020 if ((eccline->dirty & bit) == 0)
1021 continue;
1022 if (error) {
1023 eccline->error |= bit;
1024 } else {
1025 eccline->dirty &= ~bit;
1026 }
1027 if (eccline->bufs[i]) {
1028 len = eccline->bufs_len[i];
1029 nestiobuf_done(eccline->bufs[i], len, error);
1030 eccline->bufs[i] = NULL;
1031 }
1032 }
1033 KASSERT(eccline->dirty == 0);
1034
1035 KASSERT(error == 0);
1036 /*
1037 * XXX TODO on write errors allocate a sparable entry
1038 */
1039
1040 wakeup(eccline);
1041 udf_puteccline(eccline);
1042 }
1043
1044
1045 static void
1046 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
1047 {
1048 struct udf_mount *ump = eccline->ump;
1049 struct strat_private *priv = PRIV(ump);
1050 struct buf *buf, *nestbuf;
1051 uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
1052 uint32_t start;
1053 int sector_size = ump->discinfo.sector_size;
1054 int blks = sector_size / DEV_BSIZE;
1055 int i;
1056
1057 if (queued_on == UDF_SHED_READING) {
1058 DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
1059 /* read all bits that are not yet present */
1060 eccline->readin = (~eccline->present) & allbits;
1061 KASSERT(eccline->readin);
1062 start = eccline->start_sector;
1063 buf = eccline->buf;
1064 buf_init(buf);
1065 buf->b_flags = B_READ | B_ASYNC;
1066 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1067 buf->b_oflags = 0;
1068 buf->b_iodone = udf_shedule_read_callback;
1069 buf->b_data = eccline->blob;
1070 buf->b_bcount = ump->packet_size * sector_size;
1071 buf->b_resid = buf->b_bcount;
1072 buf->b_bufsize = buf->b_bcount;
1073 buf->b_private = eccline;
1074 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1075 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1076 buf->b_proc = NULL;
1077
1078 if (eccline->present != 0) {
1079 for (i = 0; i < ump->packet_size; i++) {
1080 bit = (uint64_t) 1 << i;
1081 if (eccline->present & bit) {
1082 nestiobuf_done(buf, sector_size, 0);
1083 continue;
1084 }
1085 nestbuf = getiobuf(NULL, true);
1086 nestiobuf_setup(buf, nestbuf, i * sector_size,
1087 sector_size);
1088 /* adjust blocknumber to read */
1089 nestbuf->b_blkno = buf->b_blkno + i*blks;
1090 nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
1091
1092 DPRINTF(SHEDULE, ("sector %d ",
1093 start + i));
1094 /* call asynchronous */
1095 VOP_STRATEGY(ump->devvp, nestbuf);
1096 }
1097 DPRINTF(SHEDULE, ("\n"));
1098 return;
1099 }
1100 } else {
1101 /* write or seqwrite */
1102 DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
1103 if (eccline->present != allbits) {
1104 /* requeue to read-only */
1105 DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
1106 "reading\n"));
1107 udf_push_eccline(eccline, UDF_SHED_READING);
1108 return;
1109 }
1110 start = eccline->start_sector;
1111 buf = eccline->buf;
1112 buf_init(buf);
1113 buf->b_flags = B_WRITE | B_ASYNC;
1114 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1115 buf->b_oflags = 0;
1116 buf->b_iodone = udf_shedule_write_callback;
1117 buf->b_data = eccline->blob;
1118 buf->b_bcount = ump->packet_size * sector_size;
1119 buf->b_resid = buf->b_bcount;
1120 buf->b_bufsize = buf->b_bcount;
1121 buf->b_private = eccline;
1122 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1123 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1124 buf->b_proc = NULL;
1125 }
1126
1127 mutex_exit(&priv->discstrat_mutex);
1128 /* call asynchronous */
1129 DPRINTF(SHEDULE, ("sector %d for %d\n",
1130 start, ump->packet_size));
1131 VOP_STRATEGY(ump->devvp, buf);
1132 mutex_enter(&priv->discstrat_mutex);
1133 }
1134
1135
1136 static void
1137 udf_discstrat_thread(void *arg)
1138 {
1139 struct udf_mount *ump = (struct udf_mount *) arg;
1140 struct strat_private *priv = PRIV(ump);
1141 struct udf_eccline *eccline;
1142 struct timespec now, *last;
1143 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
1144 int new_queue, wait, work, num, cnt;
1145
1146 work = 1;
1147 mutex_enter(&priv->discstrat_mutex);
1148 priv->num_floating = 0;
1149 while (priv->run_thread || work || priv->num_floating) {
1150 /* get our time */
1151 vfs_timestamp(&now);
1152
1153 /* maintenance: handle eccline state machine */
1154 num = priv->num_queued[UDF_SHED_WAITING];
1155 cnt = 0;
1156 while (cnt < num) {
1157 eccline = udf_pop_eccline(priv, UDF_SHED_WAITING);
1158 /* requeue */
1159 new_queue = UDF_SHED_FREE;
1160 if (eccline->refcnt > 0)
1161 new_queue = UDF_SHED_IDLE;
1162 if (eccline->flags & ECC_WANTED)
1163 new_queue = UDF_SHED_IDLE;
1164 if (eccline->readin)
1165 new_queue = UDF_SHED_READING;
1166 if (eccline->dirty) {
1167 new_queue = UDF_SHED_WAITING;
1168 if (eccline->wait_time.tv_sec - now.tv_sec <= 0) {
1169 new_queue = UDF_SHED_WRITING;
1170 if (eccline->flags & ECC_SEQWRITING)
1171 new_queue = UDF_SHED_SEQWRITING;
1172 if (eccline->present != allbits)
1173 new_queue = UDF_SHED_READING;
1174 }
1175 }
1176 udf_push_eccline(eccline, new_queue);
1177 cnt++;
1178 }
1179
1180 /* maintenance: free exess ecclines */
1181 while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
1182 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1183 KASSERT(eccline);
1184 KASSERT(eccline->refcnt == 0);
1185 if (eccline->flags & ECC_WANTED) {
1186 udf_push_eccline(eccline, UDF_SHED_IDLE);
1187 DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
1188 } else {
1189 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
1190 udf_dispose_eccline(eccline);
1191 }
1192 }
1193
1194 /* process the current selected queue */
1195 /* get our time */
1196 vfs_timestamp(&now);
1197 last = &priv->last_queued[priv->cur_queue];
1198
1199 /* don't shedule too quickly when there is only one */
1200 if (priv->cur_queue == UDF_SHED_WRITING) {
1201 if (priv->num_queued[priv->cur_queue] <= 2) {
1202 if (now.tv_sec - last->tv_sec < 4) {
1203 /* wait some time */
1204 cv_timedwait(&priv->discstrat_cv,
1205 &priv->discstrat_mutex, hz);
1206 continue;
1207 }
1208 }
1209 }
1210
1211 /* get our line */
1212 eccline = udf_pop_eccline(priv, priv->cur_queue);
1213 if (eccline) {
1214 wait = 0;
1215 new_queue = priv->cur_queue;
1216 DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
1217
1218 /* complete the `get' by locking and refcounting it */
1219 UDF_LOCK_ECCLINE(eccline);
1220 eccline->refcnt++;
1221
1222 udf_issue_eccline(eccline, priv->cur_queue);
1223 } else {
1224 wait = 1;
1225 /* check if we can/should switch */
1226 new_queue = priv->cur_queue;
1227 if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
1228 new_queue = UDF_SHED_READING;
1229 if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))
1230 new_queue = UDF_SHED_WRITING;
1231 if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
1232 new_queue = UDF_SHED_SEQWRITING;
1233
1234 /* dont switch seqwriting too fast */
1235 if (priv->cur_queue == UDF_SHED_READING) {
1236 if (now.tv_sec - last->tv_sec < 1)
1237 new_queue = priv->cur_queue;
1238 }
1239 if (priv->cur_queue == UDF_SHED_WRITING) {
1240 if (now.tv_sec - last->tv_sec < 2)
1241 new_queue = priv->cur_queue;
1242 }
1243 if (priv->cur_queue == UDF_SHED_SEQWRITING) {
1244 if (now.tv_sec - last->tv_sec < 4)
1245 new_queue = priv->cur_queue;
1246 }
1247 }
1248
1249 /* give room */
1250 mutex_exit(&priv->discstrat_mutex);
1251
1252 if (new_queue != priv->cur_queue) {
1253 wait = 0;
1254 DPRINTF(SHEDULE, ("switching from %d to %d\n",
1255 priv->cur_queue, new_queue));
1256 priv->cur_queue = new_queue;
1257 }
1258 mutex_enter(&priv->discstrat_mutex);
1259
1260 /* wait for more if needed */
1261 if (wait)
1262 cv_timedwait(&priv->discstrat_cv,
1263 &priv->discstrat_mutex, hz); /* /8 */
1264
1265 work = (BUFQ_PEEK(priv->queues[UDF_SHED_WAITING]) != NULL);
1266 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL);
1267 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL);
1268 work |= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
1269
1270 DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
1271 (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL),
1272 (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL),
1273 (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
1274 work, priv->num_floating));
1275 }
1276
1277 mutex_exit(&priv->discstrat_mutex);
1278
1279 /* tear down remaining ecclines */
1280 mutex_enter(&priv->discstrat_mutex);
1281 KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0);
1282 KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
1283 KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
1284 KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
1285 KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
1286
1287 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WAITING]) == NULL);
1288 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_IDLE]) == NULL);
1289 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
1290 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
1291 KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
1292 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1293 while (eccline) {
1294 udf_dispose_eccline(eccline);
1295 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1296 }
1297 KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
1298 mutex_exit(&priv->discstrat_mutex);
1299
1300 priv->thread_finished = 1;
1301 wakeup(&priv->run_thread);
1302 kthread_exit(0);
1303 /* not reached */
1304 }
1305
1306 /* --------------------------------------------------------------------- */
1307
1308 /*
1309 * Buffer memory pool allocator.
1310 */
1311
1312 static void *
1313 ecclinepool_page_alloc(struct pool *pp, int flags)
1314 {
1315 return (void *)uvm_km_alloc(kernel_map,
1316 MAXBSIZE, MAXBSIZE,
1317 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
1318 | UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
1319 }
1320
1321 static void
1322 ecclinepool_page_free(struct pool *pp, void *v)
1323 {
1324 uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
1325 }
1326
1327 static struct pool_allocator ecclinepool_allocator = {
1328 .pa_alloc = ecclinepool_page_alloc,
1329 .pa_free = ecclinepool_page_free,
1330 .pa_pagesz = MAXBSIZE,
1331 };
1332
1333
1334 static void
1335 udf_discstrat_init_rmw(struct udf_strat_args *args)
1336 {
1337 struct udf_mount *ump = args->ump;
1338 struct strat_private *priv = PRIV(ump);
1339 uint32_t lb_size, blobsize, hashline;
1340 int i;
1341
1342 KASSERT(ump);
1343 KASSERT(ump->logical_vol);
1344 KASSERT(priv == NULL);
1345
1346 lb_size = udf_rw32(ump->logical_vol->lb_size);
1347 blobsize = ump->packet_size * lb_size;
1348 KASSERT(lb_size > 0);
1349 KASSERT(ump->packet_size <= 64);
1350
1351 /* initialise our memory space */
1352 ump->strategy_private = malloc(sizeof(struct strat_private),
1353 M_UDFTEMP, M_WAITOK);
1354 priv = ump->strategy_private;
1355 memset(priv, 0 , sizeof(struct strat_private));
1356
1357 /* initialise locks */
1358 cv_init(&priv->discstrat_cv, "udfstrat");
1359 mutex_init(&priv->discstrat_mutex, MUTEX_DRIVER, IPL_BIO);
1360 mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
1361
1362 /* initialise struct eccline pool */
1363 pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
1364 0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
1365
1366 /* initialise eccline blob pool */
1367 pool_init(&priv->ecclineblob_pool, blobsize,
1368 0,0,0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
1369
1370 /* initialise main queues */
1371 for (i = 0; i < UDF_SHED_MAX; i++) {
1372 priv->num_queued[i] = 0;
1373 vfs_timestamp(&priv->last_queued[i]);
1374 }
1375 bufq_alloc(&priv->queues[UDF_SHED_WAITING], "fcfs",
1376 BUFQ_SORT_RAWBLOCK);
1377 bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
1378 BUFQ_SORT_RAWBLOCK);
1379 bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
1380 BUFQ_SORT_RAWBLOCK);
1381 bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
1382
1383 /* initialise administrative queues */
1384 bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
1385 bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
1386
1387 for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
1388 LIST_INIT(&priv->eccline_hash[hashline]);
1389 }
1390
1391 /* create our disk strategy thread */
1392 priv->cur_queue = UDF_SHED_READING;
1393 priv->thread_finished = 0;
1394 priv->run_thread = 1;
1395 if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
1396 udf_discstrat_thread, ump, &priv->queue_lwp,
1397 "%s", "udf_rw")) {
1398 panic("fork udf_rw");
1399 }
1400 }
1401
1402
1403 static void
1404 udf_discstrat_finish_rmw(struct udf_strat_args *args)
1405 {
1406 struct udf_mount *ump = args->ump;
1407 struct strat_private *priv = PRIV(ump);
1408 int error;
1409
1410 if (ump == NULL)
1411 return;
1412
1413 /* stop our sheduling thread */
1414 KASSERT(priv->run_thread == 1);
1415 priv->run_thread = 0;
1416 wakeup(priv->queue_lwp);
1417 while (!priv->thread_finished) {
1418 error = tsleep(&priv->run_thread, PRIBIO+1,
1419 "udfshedfin", hz);
1420 }
1421 /* kthread should be finished now */
1422
1423 /* cleanup our pools */
1424 pool_destroy(&priv->eccline_pool);
1425 pool_destroy(&priv->ecclineblob_pool);
1426
1427 cv_destroy(&priv->discstrat_cv);
1428 mutex_destroy(&priv->discstrat_mutex);
1429 mutex_destroy(&priv->seqwrite_mutex);
1430
1431 /* free our private space */
1432 free(ump->strategy_private, M_UDFTEMP);
1433 ump->strategy_private = NULL;
1434 }
1435
1436 /* --------------------------------------------------------------------- */
1437
1438 struct udf_strategy udf_strat_rmw =
1439 {
1440 udf_create_nodedscr_rmw,
1441 udf_free_nodedscr_rmw,
1442 udf_read_nodedscr_rmw,
1443 udf_write_nodedscr_rmw,
1444 udf_queuebuf_rmw,
1445 udf_discstrat_init_rmw,
1446 udf_discstrat_finish_rmw
1447 };
1448
1449