udf_strat_rmw.c revision 1.17 1 /* $NetBSD: udf_strat_rmw.c,v 1.17 2009/01/13 13:35:54 yamt Exp $ */
2
3 /*
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.17 2009/01/13 13:35:54 yamt Exp $");
32 #endif /* not lint */
33
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
48 #include <sys/buf.h>
49 #include <sys/file.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
55 #include <sys/stat.h>
56 #include <sys/conf.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
60
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
63
64 #include "udf.h"
65 #include "udf_subr.h"
66 #include "udf_bswap.h"
67
68
69 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
70 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
71 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
72
73 /* --------------------------------------------------------------------- */
74
75 #define UDF_MAX_PACKET_SIZE 64 /* DONT change this */
76
77 /* sheduler states */
78 #define UDF_SHED_WAITING 1 /* waiting on timeout */
79 #define UDF_SHED_READING 2
80 #define UDF_SHED_WRITING 3
81 #define UDF_SHED_SEQWRITING 4
82 #define UDF_SHED_IDLE 5 /* resting */
83 #define UDF_SHED_FREE 6 /* recycleable */
84 #define UDF_SHED_MAX 6+1
85
86 /* flags */
87 #define ECC_LOCKED 0x01 /* prevent access */
88 #define ECC_WANTED 0x02 /* trying access */
89 #define ECC_SEQWRITING 0x04 /* sequential queue */
90 #define ECC_FLOATING 0x08 /* not queued yet */
91
92 #define ECC_WAITTIME 10
93
94
95 TAILQ_HEAD(ecclineq, udf_eccline);
96 struct udf_eccline {
97 struct udf_mount *ump;
98 uint64_t present; /* preserve these */
99 uint64_t readin; /* bitmap */
100 uint64_t dirty; /* bitmap */
101 uint64_t error; /* bitmap */
102 uint32_t refcnt;
103
104 struct timespec wait_time;
105 uint32_t flags;
106 uint32_t start_sector; /* physical */
107
108 struct buf *buf;
109 void *blob;
110
111 struct buf *bufs[UDF_MAX_PACKET_SIZE];
112 uint32_t bufs_bpos[UDF_MAX_PACKET_SIZE];
113 int bufs_len[UDF_MAX_PACKET_SIZE];
114
115 int queued_on; /* on which BUFQ list */
116 LIST_ENTRY(udf_eccline) hashchain; /* on sector lookup */
117 };
118
119
120 struct strat_private {
121 lwp_t *queue_lwp;
122 kcondvar_t discstrat_cv; /* to wait on */
123 kmutex_t discstrat_mutex; /* disc strategy */
124 kmutex_t seqwrite_mutex; /* protect mappings */
125
126 int thread_running; /* thread control */
127 int run_thread; /* thread control */
128 int thread_finished; /* thread control */
129 int cur_queue;
130
131 int num_floating;
132 int num_queued[UDF_SHED_MAX];
133 struct bufq_state *queues[UDF_SHED_MAX];
134 struct timespec last_queued[UDF_SHED_MAX];
135 struct disk_strategy old_strategy_setting;
136
137 struct pool eccline_pool;
138 struct pool ecclineblob_pool;
139 LIST_HEAD(, udf_eccline) eccline_hash[UDF_ECCBUF_HASHSIZE];
140 };
141
142 /* --------------------------------------------------------------------- */
143
144 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
145 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
146
147 /* can be called with or without discstrat lock */
148 static void
149 udf_lock_eccline(struct udf_eccline *eccline)
150 {
151 struct strat_private *priv = PRIV(eccline->ump);
152 int waslocked, ret;
153
154 waslocked = mutex_owned(&priv->discstrat_mutex);
155 if (!waslocked)
156 mutex_enter(&priv->discstrat_mutex);
157
158 /* wait until its unlocked first */
159 while (eccline->flags & ECC_LOCKED) {
160 eccline->flags |= ECC_WANTED;
161 ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
162 hz/8);
163 if (ret == EWOULDBLOCK)
164 DPRINTF(LOCKING, ("eccline lock helt, waiting for "
165 "release"));
166 }
167 eccline->flags |= ECC_LOCKED;
168 eccline->flags &= ~ECC_WANTED;
169
170 if (!waslocked)
171 mutex_exit(&priv->discstrat_mutex);
172 }
173
174
175 /* can be called with or without discstrat lock */
176 static void
177 udf_unlock_eccline(struct udf_eccline *eccline)
178 {
179 struct strat_private *priv = PRIV(eccline->ump);
180 int waslocked;
181
182 waslocked = mutex_owned(&priv->discstrat_mutex);
183 if (!waslocked)
184 mutex_enter(&priv->discstrat_mutex);
185
186 eccline->flags &= ~ECC_LOCKED;
187 cv_broadcast(&priv->discstrat_cv);
188
189 if (!waslocked)
190 mutex_exit(&priv->discstrat_mutex);
191 }
192
193
194 /* NOTE discstrat_mutex should be held! */
195 static void
196 udf_dispose_eccline(struct udf_eccline *eccline)
197 {
198 struct strat_private *priv = PRIV(eccline->ump);
199 struct buf *ret;
200
201 KASSERT(mutex_owned(&priv->discstrat_mutex));
202
203 KASSERT(eccline->refcnt == 0);
204 KASSERT(eccline->dirty == 0);
205
206 DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
207 "present %0"PRIx64"\n", eccline->start_sector,
208 eccline->present));
209
210 if (eccline->queued_on) {
211 ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf);
212 KASSERT(ret == eccline->buf);
213 priv->num_queued[eccline->queued_on]--;
214 }
215 LIST_REMOVE(eccline, hashchain);
216
217 if (eccline->flags & ECC_FLOATING) {
218 eccline->flags &= ~ECC_FLOATING;
219 priv->num_floating--;
220 }
221
222 putiobuf(eccline->buf);
223 pool_put(&priv->ecclineblob_pool, eccline->blob);
224 pool_put(&priv->eccline_pool, eccline);
225 }
226
227
228 /* NOTE discstrat_mutex should be held! */
229 static void
230 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
231 {
232 struct strat_private *priv = PRIV(eccline->ump);
233 struct buf *ret;
234 int curqueue;
235
236 KASSERT(mutex_owned(&priv->discstrat_mutex));
237
238 DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
239
240 /* requeue */
241 curqueue = eccline->queued_on;
242 if (curqueue) {
243 ret = bufq_cancel(priv->queues[curqueue], eccline->buf);
244
245 DPRINTF(PARANOIA, ("push_eccline bufq_cancel returned %p when "
246 "requested to remove %p from queue %d\n", ret,
247 eccline->buf, curqueue));
248 #ifdef DIAGNOSTIC
249 if (ret == NULL) {
250 int i;
251
252 printf("udf_push_eccline: bufq_cancel can't find "
253 "buffer; dumping queues\n");
254 for (i = 1; i < UDF_SHED_MAX; i++) {
255 printf("queue %d\n\t", i);
256 ret = bufq_get(priv->queues[i]);
257 while (ret) {
258 printf("%p ", ret);
259 if (ret == eccline->buf)
260 printf("[<-] ");
261 ret = bufq_get(priv->queues[i]);
262 }
263 printf("\n");
264 }
265 panic("fatal queue bug; exit");
266 }
267 #endif
268
269 KASSERT(ret == eccline->buf);
270 priv->num_queued[curqueue]--;
271 }
272
273 /* set buffer block numbers to make sure its queued correctly */
274 eccline->buf->b_lblkno = eccline->start_sector;
275 eccline->buf->b_blkno = eccline->start_sector;
276 eccline->buf->b_rawblkno = eccline->start_sector;
277
278 bufq_put(priv->queues[newqueue], eccline->buf);
279 eccline->queued_on = newqueue;
280 priv->num_queued[newqueue]++;
281 vfs_timestamp(&priv->last_queued[newqueue]);
282
283 if (eccline->flags & ECC_FLOATING) {
284 eccline->flags &= ~ECC_FLOATING;
285 priv->num_floating--;
286 }
287
288 /* tickle disc strategy statemachine */
289 if (newqueue != UDF_SHED_IDLE)
290 cv_signal(&priv->discstrat_cv);
291 }
292
293
294 static struct udf_eccline *
295 udf_pop_eccline(struct strat_private *priv, int queued_on)
296 {
297 struct udf_eccline *eccline;
298 struct buf *buf;
299
300 KASSERT(mutex_owned(&priv->discstrat_mutex));
301
302 buf = bufq_get(priv->queues[queued_on]);
303 if (!buf) {
304 KASSERT(priv->num_queued[queued_on] == 0);
305 return NULL;
306 }
307
308 eccline = BTOE(buf);
309 KASSERT(eccline->queued_on == queued_on);
310 eccline->queued_on = 0;
311 priv->num_queued[queued_on]--;
312
313 if (eccline->flags & ECC_FLOATING)
314 panic("popping already marked floating eccline");
315 eccline->flags |= ECC_FLOATING;
316 priv->num_floating++;
317
318 DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
319 eccline->buf, queued_on));
320
321 return eccline;
322 }
323
324
325 static struct udf_eccline *
326 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
327 {
328 struct strat_private *priv = PRIV(ump);
329 struct udf_eccline *eccline;
330 uint32_t start_sector, lb_size, blobsize;
331 uint8_t *eccline_blob;
332 int line, line_offset;
333 int num_busy, ret;
334
335 line_offset = sector % ump->packet_size;
336 start_sector = sector - line_offset;
337 line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
338
339 mutex_enter(&priv->discstrat_mutex);
340 KASSERT(priv->thread_running);
341
342 retry:
343 DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
344 LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
345 if (eccline->start_sector == start_sector) {
346 DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
347 eccline->start_sector));
348
349 UDF_LOCK_ECCLINE(eccline);
350 /* move from freelist (!) */
351 if (eccline->queued_on == UDF_SHED_FREE) {
352 DPRINTF(ECCLINE, ("was on freelist\n"));
353 KASSERT(eccline->refcnt == 0);
354 udf_push_eccline(eccline, UDF_SHED_IDLE);
355 }
356 eccline->refcnt++;
357 mutex_exit(&priv->discstrat_mutex);
358 return eccline;
359 }
360 }
361
362 DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
363 /* not found in eccline cache */
364
365 lb_size = udf_rw32(ump->logical_vol->lb_size);
366 blobsize = ump->packet_size * lb_size;
367
368 /* dont allow too many pending requests */
369 DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
370 num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
371 if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
372 ret = cv_timedwait(&priv->discstrat_cv,
373 &priv->discstrat_mutex, hz/8);
374 goto retry;
375 }
376
377 eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
378 eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
379 if ((eccline_blob == NULL) || (eccline == NULL)) {
380 if (eccline_blob)
381 pool_put(&priv->ecclineblob_pool, eccline_blob);
382 if (eccline)
383 pool_put(&priv->eccline_pool, eccline);
384
385 /* out of memory for now; canibalise freelist */
386 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
387 if (eccline == NULL) {
388 /* serious trouble; wait and retry */
389 cv_timedwait(&priv->discstrat_cv,
390 &priv->discstrat_mutex, hz/8);
391 goto retry;
392 }
393 /* push back line if we're waiting for it */
394 if (eccline->flags & ECC_WANTED) {
395 udf_push_eccline(eccline, UDF_SHED_IDLE);
396 goto retry;
397 }
398
399 /* unlink this entry */
400 LIST_REMOVE(eccline, hashchain);
401
402 KASSERT(eccline->flags & ECC_FLOATING);
403
404 eccline_blob = eccline->blob;
405 memset(eccline, 0, sizeof(struct udf_eccline));
406 eccline->flags = ECC_FLOATING;
407 } else {
408 memset(eccline, 0, sizeof(struct udf_eccline));
409 eccline->flags = ECC_FLOATING;
410 priv->num_floating++;
411 }
412
413 eccline->queued_on = 0;
414 eccline->blob = eccline_blob;
415 eccline->buf = getiobuf(NULL, true);
416 eccline->buf->b_private = eccline; /* IMPORTANT */
417
418 /* initialise eccline blob */
419 memset(eccline->blob, 0, blobsize);
420
421 eccline->ump = ump;
422 eccline->present = eccline->readin = eccline->dirty = 0;
423 eccline->error = 0;
424 eccline->refcnt = 0;
425
426 eccline->start_sector = start_sector;
427 eccline->buf->b_lblkno = start_sector;
428 eccline->buf->b_blkno = start_sector;
429 eccline->buf->b_rawblkno = start_sector;
430
431 LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
432
433 /*
434 * TODO possible optimalisation for checking overlap with partitions
435 * to get a clue on future eccline usage
436 */
437 eccline->refcnt++;
438 UDF_LOCK_ECCLINE(eccline);
439
440 mutex_exit(&priv->discstrat_mutex);
441
442 return eccline;
443 }
444
445
446 static void
447 udf_puteccline(struct udf_eccline *eccline)
448 {
449 struct strat_private *priv = PRIV(eccline->ump);
450 struct udf_mount *ump = eccline->ump;
451 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
452
453 mutex_enter(&priv->discstrat_mutex);
454
455 /* clear directly all readin requests from present ones */
456 if (eccline->readin & eccline->present) {
457 /* clear all read bits that are already read in */
458 eccline->readin &= (~eccline->present) & allbits;
459 wakeup(eccline);
460 }
461
462 DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
463 eccline->start_sector, eccline->refcnt));
464
465 /* if we have active nodes we dont set it on seqwriting */
466 if (eccline->refcnt > 1)
467 eccline->flags &= ~ECC_SEQWRITING;
468
469 vfs_timestamp(&eccline->wait_time);
470 eccline->wait_time.tv_sec += ECC_WAITTIME;
471 udf_push_eccline(eccline, UDF_SHED_WAITING);
472
473 KASSERT(eccline->refcnt >= 1);
474 eccline->refcnt--;
475 UDF_UNLOCK_ECCLINE(eccline);
476
477 wakeup(eccline);
478 mutex_exit(&priv->discstrat_mutex);
479 }
480
481 /* --------------------------------------------------------------------- */
482
483 static int
484 udf_create_nodedscr_rmw(struct udf_strat_args *args)
485 {
486 union dscrptr **dscrptr = &args->dscr;
487 struct udf_mount *ump = args->ump;
488 struct long_ad *icb = args->icb;
489 struct udf_eccline *eccline;
490 uint64_t bit;
491 uint32_t sectornr, lb_size, dummy;
492 uint8_t *mem;
493 int error, eccsect;
494
495 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
496 if (error)
497 return error;
498
499 lb_size = udf_rw32(ump->logical_vol->lb_size);
500
501 /* get our eccline */
502 eccline = udf_geteccline(ump, sectornr, 0);
503 eccsect = sectornr - eccline->start_sector;
504
505 bit = (uint64_t) 1 << eccsect;
506 eccline->readin &= ~bit; /* just in case */
507 eccline->present |= bit;
508 eccline->dirty &= ~bit; /* Err... euhm... clean? */
509
510 eccline->refcnt++;
511
512 /* clear space */
513 mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
514 memset(mem, 0, lb_size);
515
516 udf_puteccline(eccline);
517
518 *dscrptr = (union dscrptr *) mem;
519 return 0;
520 }
521
522
523 static void
524 udf_free_nodedscr_rmw(struct udf_strat_args *args)
525 {
526 struct udf_mount *ump = args->ump;
527 struct long_ad *icb = args->icb;
528 struct udf_eccline *eccline;
529 uint64_t bit;
530 uint32_t sectornr, dummy;
531 int error, eccsect;
532
533 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
534 if (error)
535 return;
536
537 /* get our eccline */
538 eccline = udf_geteccline(ump, sectornr, 0);
539 eccsect = sectornr - eccline->start_sector;
540
541 bit = (uint64_t) 1 << eccsect;
542 eccline->readin &= ~bit; /* just in case */
543
544 KASSERT(eccline->refcnt >= 1);
545 eccline->refcnt--;
546
547 udf_puteccline(eccline);
548 }
549
550
551 static int
552 udf_read_nodedscr_rmw(struct udf_strat_args *args)
553 {
554 union dscrptr **dscrptr = &args->dscr;
555 struct udf_mount *ump = args->ump;
556 struct long_ad *icb = args->icb;
557 struct udf_eccline *eccline;
558 uint64_t bit;
559 uint32_t sectornr, dummy;
560 uint8_t *pos;
561 int sector_size = ump->discinfo.sector_size;
562 int lb_size = udf_rw32(ump->logical_vol->lb_size);
563 int i, error, dscrlen, eccsect;
564
565 lb_size = lb_size;
566 KASSERT(sector_size == lb_size);
567 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
568 if (error)
569 return error;
570
571 /* get our eccline */
572 eccline = udf_geteccline(ump, sectornr, 0);
573 eccsect = sectornr - eccline->start_sector;
574
575 bit = (uint64_t) 1 << eccsect;
576 if ((eccline->present & bit) == 0) {
577 /* mark bit for readin */
578 eccline->readin |= bit;
579 eccline->refcnt++; /* prevent recycling */
580 KASSERT(eccline->bufs[eccsect] == NULL);
581 udf_puteccline(eccline);
582
583 /* wait for completion; XXX remodel to lock bit code */
584 error = 0;
585 while ((eccline->present & bit) == 0) {
586 tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
587 if (eccline->error & bit) {
588 KASSERT(eccline->refcnt >= 1);
589 eccline->refcnt--; /* undo temp refcnt */
590 *dscrptr = NULL;
591 return EIO; /* XXX error code */
592 }
593 }
594
595 /* reget our line */
596 eccline = udf_geteccline(ump, sectornr, 0);
597 KASSERT(eccline->refcnt >= 1);
598 eccline->refcnt--; /* undo refcnt */
599 }
600
601 *dscrptr = (union dscrptr *)
602 (((uint8_t *) eccline->blob) + eccsect * sector_size);
603
604 /* code from read_phys_descr */
605 /* check if its a valid tag */
606 error = udf_check_tag(*dscrptr);
607 if (error) {
608 /* check if its an empty block */
609 pos = (uint8_t *) *dscrptr;
610 for (i = 0; i < sector_size; i++, pos++) {
611 if (*pos) break;
612 }
613 if (i == sector_size) {
614 /* return no error but with no dscrptr */
615 error = 0;
616 }
617 *dscrptr = NULL;
618 udf_puteccline(eccline);
619 return error;
620 }
621
622 /* calculate descriptor size */
623 dscrlen = udf_tagsize(*dscrptr, sector_size);
624 error = udf_check_tag_payload(*dscrptr, dscrlen);
625 if (error) {
626 *dscrptr = NULL;
627 udf_puteccline(eccline);
628 return error;
629 }
630
631 eccline->refcnt++;
632 udf_puteccline(eccline);
633
634 return 0;
635 }
636
637
638 static int
639 udf_write_nodedscr_rmw(struct udf_strat_args *args)
640 {
641 union dscrptr *dscrptr = args->dscr;
642 struct udf_mount *ump = args->ump;
643 struct long_ad *icb = args->icb;
644 struct udf_node *udf_node = args->udf_node;
645 struct udf_eccline *eccline;
646 uint64_t bit;
647 uint32_t sectornr, logsectornr, dummy;
648 // int waitfor = args->waitfor;
649 int sector_size = ump->discinfo.sector_size;
650 int lb_size = udf_rw32(ump->logical_vol->lb_size);
651 int error, eccsect;
652
653 lb_size = lb_size;
654 KASSERT(sector_size == lb_size);
655 sectornr = 0;
656 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
657 if (error)
658 return error;
659
660 /* add reference to the vnode to prevent recycling */
661 vhold(udf_node->vnode);
662
663 /* get our eccline */
664 eccline = udf_geteccline(ump, sectornr, 0);
665 eccsect = sectornr - eccline->start_sector;
666
667 bit = (uint64_t) 1 << eccsect;
668
669 /* old callback still pending? */
670 if (eccline->bufs[eccsect]) {
671 DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
672 " over buffer?\n"));
673 nestiobuf_done(eccline->bufs[eccsect],
674 eccline->bufs_len[eccsect],
675 0);
676 eccline->bufs[eccsect] = NULL;
677 }
678
679 /* set sector number in the descriptor and validate */
680 dscrptr = (union dscrptr *)
681 (((uint8_t *) eccline->blob) + eccsect * sector_size);
682 KASSERT(dscrptr == args->dscr);
683
684 logsectornr = udf_rw32(icb->loc.lb_num);
685 dscrptr->tag.tag_loc = udf_rw32(logsectornr);
686 udf_validate_tag_and_crc_sums(dscrptr);
687
688 udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
689
690 /* set our flags */
691 KASSERT(eccline->present & bit);
692 eccline->dirty |= bit;
693
694 KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
695
696 udf_puteccline(eccline);
697
698 holdrele(udf_node->vnode);
699 udf_node->outstanding_nodedscr--;
700 if (udf_node->outstanding_nodedscr == 0) {
701 UDF_UNLOCK_NODE(udf_node, udf_node->i_flags & IN_CALLBACK_ULK);
702 wakeup(&udf_node->outstanding_nodedscr);
703 }
704
705 /* XXX waitfor not used */
706 return 0;
707 }
708
709
710 static void
711 udf_queuebuf_rmw(struct udf_strat_args *args)
712 {
713 struct udf_mount *ump = args->ump;
714 struct buf *buf = args->nestbuf;
715 struct desc_tag *tag;
716 struct strat_private *priv = PRIV(ump);
717 struct udf_eccline *eccline;
718 struct long_ad *node_ad_cpy;
719 uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
720 uint32_t buf_len, len, sectors, sectornr, our_sectornr;
721 uint32_t bpos;
722 uint16_t vpart_num;
723 uint8_t *fidblk, *src, *dst;
724 int sector_size = ump->discinfo.sector_size;
725 int blks = sector_size / DEV_BSIZE;
726 int eccsect, what, queue, error;
727
728 KASSERT(ump);
729 KASSERT(buf);
730 KASSERT(buf->b_iodone == nestiobuf_iodone);
731
732 blknr = buf->b_blkno;
733 our_sectornr = blknr / blks;
734
735 what = buf->b_udf_c_type;
736 queue = UDF_SHED_READING;
737 if ((buf->b_flags & B_READ) == 0) {
738 /* writing */
739 queue = UDF_SHED_SEQWRITING;
740 if (what == UDF_C_DSCR)
741 queue = UDF_SHED_WRITING;
742 if (what == UDF_C_NODE)
743 queue = UDF_SHED_WRITING;
744 }
745
746 if (queue == UDF_SHED_READING) {
747 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
748 "b_resid %d, b_bcount %d, b_bufsize %d\n",
749 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
750 buf->b_resid, buf->b_bcount, buf->b_bufsize));
751
752 /* mark bits for reading */
753 buf_len = buf->b_bcount;
754 sectornr = our_sectornr;
755 eccline = udf_geteccline(ump, sectornr, 0);
756 eccsect = sectornr - eccline->start_sector;
757 bpos = 0;
758 while (buf_len) {
759 len = MIN(buf_len, sector_size);
760 if (eccsect == ump->packet_size) {
761 udf_puteccline(eccline);
762 eccline = udf_geteccline(ump, sectornr, 0);
763 eccsect = sectornr - eccline->start_sector;
764 }
765 bit = (uint64_t) 1 << eccsect;
766 error = eccline->error & bit ? EIO : 0;
767 if (eccline->present & bit) {
768 src = (uint8_t *) eccline->blob +
769 eccsect * sector_size;
770 dst = (uint8_t *) buf->b_data + bpos;
771 if (!error)
772 memcpy(dst, src, len);
773 nestiobuf_done(buf, len, error);
774 } else {
775 eccline->readin |= bit;
776 KASSERT(eccline->bufs[eccsect] == NULL);
777 eccline->bufs[eccsect] = buf;
778 eccline->bufs_bpos[eccsect] = bpos;
779 eccline->bufs_len[eccsect] = len;
780 }
781 bpos += sector_size;
782 eccsect++;
783 sectornr++;
784 buf_len -= len;
785 }
786 udf_puteccline(eccline);
787 return;
788 }
789
790 if (queue == UDF_SHED_WRITING) {
791 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
792 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
793 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
794 buf->b_resid, buf->b_bcount, buf->b_bufsize));
795 /* if we have FIDs fixup using buffer's sector number(s) */
796 if (buf->b_udf_c_type == UDF_C_FIDS) {
797 panic("UDF_C_FIDS in SHED_WRITING!\n");
798 #if 0
799 buf_len = buf->b_bcount;
800 sectornr = our_sectornr;
801 bpos = 0;
802 while (buf_len) {
803 len = MIN(buf_len, sector_size);
804 fidblk = (uint8_t *) buf->b_data + bpos;
805 udf_fixup_fid_block(fidblk, sector_size,
806 0, len, sectornr);
807 sectornr++;
808 bpos += len;
809 buf_len -= len;
810 }
811 #endif
812 }
813 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
814
815 /* copy parts into the bufs and set for writing */
816 buf_len = buf->b_bcount;
817 sectornr = our_sectornr;
818 eccline = udf_geteccline(ump, sectornr, 0);
819 eccsect = sectornr - eccline->start_sector;
820 bpos = 0;
821 while (buf_len) {
822 len = MIN(buf_len, sector_size);
823 if (eccsect == ump->packet_size) {
824 udf_puteccline(eccline);
825 eccline = udf_geteccline(ump, sectornr, 0);
826 eccsect = sectornr - eccline->start_sector;
827 }
828 bit = (uint64_t) 1 << eccsect;
829 KASSERT((eccline->readin & bit) == 0);
830 eccline->present |= bit;
831 eccline->dirty |= bit;
832 if (eccline->bufs[eccsect]) {
833 /* old callback still pending */
834 nestiobuf_done(eccline->bufs[eccsect],
835 eccline->bufs_len[eccsect],
836 0);
837 eccline->bufs[eccsect] = NULL;
838 }
839
840 src = (uint8_t *) buf->b_data + bpos;
841 dst = (uint8_t *) eccline->blob + eccsect * sector_size;
842 if (len != sector_size)
843 memset(dst, 0, sector_size);
844 memcpy(dst, src, len);
845
846 /* note that its finished for this extent */
847 eccline->bufs[eccsect] = NULL;
848 nestiobuf_done(buf, len, 0);
849
850 bpos += sector_size;
851 eccsect++;
852 sectornr++;
853 buf_len -= len;
854 }
855 udf_puteccline(eccline);
856 return;
857
858 }
859
860 /* sequential writing */
861 KASSERT(queue == UDF_SHED_SEQWRITING);
862 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
863 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
864 buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
865 buf->b_bufsize));
866 /*
867 * Buffers should not have been allocated to disc addresses yet on
868 * this queue. Note that a buffer can get multiple extents allocated.
869 * Note that it *looks* like the normal writing but its different in
870 * the details.
871 *
872 * lmapping contains lb_num relative to base partition.
873 *
874 * XXX should we try to claim/organize the allocated memory to
875 * block-aligned pieces?
876 */
877 mutex_enter(&priv->seqwrite_mutex);
878
879 lmapping = ump->la_lmapping;
880 node_ad_cpy = ump->la_node_ad_cpy;
881
882 /* logically allocate buf and map it in the file */
883 udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
884
885 /* if we have FIDs, fixup using the new allocation table */
886 if (buf->b_udf_c_type == UDF_C_FIDS) {
887 buf_len = buf->b_bcount;
888 bpos = 0;
889 lmappos = lmapping;
890 while (buf_len) {
891 sectornr = *lmappos++;
892 len = MIN(buf_len, sector_size);
893 fidblk = (uint8_t *) buf->b_data + bpos;
894 udf_fixup_fid_block(fidblk, sector_size,
895 0, len, sectornr);
896 bpos += len;
897 buf_len -= len;
898 }
899 }
900 if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
901 if (buf->b_lblkno == 0) {
902 /* update the tag location inside */
903 tag = (struct desc_tag *) buf->b_data;
904 tag->tag_loc = udf_rw32(*lmapping);
905 udf_validate_tag_and_crc_sums(buf->b_data);
906 }
907 }
908 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
909
910 /*
911 * Translate new mappings in lmapping to pmappings.
912 * pmapping to contain lb_nums as used for disc adressing.
913 */
914 pmapping = ump->la_pmapping;
915 sectors = (buf->b_bcount + sector_size -1) / sector_size;
916 udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
917
918 /* copy parts into the bufs and set for writing */
919 pmappos = pmapping;
920 buf_len = buf->b_bcount;
921 sectornr = *pmappos++;
922 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
923 eccsect = sectornr - eccline->start_sector;
924 bpos = 0;
925 while (buf_len) {
926 len = MIN(buf_len, sector_size);
927 eccsect = sectornr - eccline->start_sector;
928 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
929 eccline->flags |= ECC_SEQWRITING;
930 udf_puteccline(eccline);
931 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
932 eccsect = sectornr - eccline->start_sector;
933 }
934 bit = (uint64_t) 1 << eccsect;
935 KASSERT((eccline->readin & bit) == 0);
936 eccline->present |= bit;
937 eccline->dirty |= bit;
938 eccline->bufs[eccsect] = NULL;
939
940 src = (uint8_t *) buf->b_data + bpos;
941 dst = (uint8_t *)
942 eccline->blob + eccsect * sector_size;
943 if (len != sector_size)
944 memset(dst, 0, sector_size);
945 memcpy(dst, src, len);
946
947 /* note that its finished for this extent */
948 nestiobuf_done(buf, len, 0);
949
950 bpos += sector_size;
951 sectornr = *pmappos++;
952 buf_len -= len;
953 }
954 eccline->flags |= ECC_SEQWRITING;
955 udf_puteccline(eccline);
956 mutex_exit(&priv->seqwrite_mutex);
957 }
958
959 /* --------------------------------------------------------------------- */
960
961 static void
962 udf_shedule_read_callback(struct buf *buf)
963 {
964 struct udf_eccline *eccline = BTOE(buf);
965 struct udf_mount *ump = eccline->ump;
966 uint64_t bit;
967 uint8_t *src, *dst;
968 int sector_size = ump->discinfo.sector_size;
969 int error, i, len;
970
971 DPRINTF(ECCLINE, ("read callback called\n"));
972 /* post process read action */
973 error = buf->b_error;
974 for (i = 0; i < ump->packet_size; i++) {
975 bit = (uint64_t) 1 << i;
976 src = (uint8_t *) buf->b_data + i * sector_size;
977 dst = (uint8_t *) eccline->blob + i * sector_size;
978 if (eccline->present & bit)
979 continue;
980 eccline->present |= bit;
981 if (error)
982 eccline->error |= bit;
983 if (eccline->bufs[i]) {
984 dst = (uint8_t *) eccline->bufs[i]->b_data +
985 eccline->bufs_bpos[i];
986 len = eccline->bufs_len[i];
987 if (!error)
988 memcpy(dst, src, len);
989 nestiobuf_done(eccline->bufs[i], len, error);
990 eccline->bufs[i] = NULL;
991 }
992
993 }
994 KASSERT(buf->b_data == eccline->blob);
995 KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
996
997 /*
998 * XXX TODO what to do on read errors? read in all sectors
999 * synchronously and allocate a sparable entry?
1000 */
1001
1002 udf_puteccline(eccline);
1003 DPRINTF(ECCLINE, ("read callback finished\n"));
1004 }
1005
1006
1007 static void
1008 udf_shedule_write_callback(struct buf *buf)
1009 {
1010 struct udf_eccline *eccline = BTOE(buf);
1011 struct udf_mount *ump = eccline->ump;
1012 uint64_t bit;
1013 int error, i, len;
1014
1015 DPRINTF(ECCLINE, ("write callback called\n"));
1016 /* post process write action */
1017 error = buf->b_error;
1018 for (i = 0; i < ump->packet_size; i++) {
1019 bit = (uint64_t) 1 << i;
1020 if ((eccline->dirty & bit) == 0)
1021 continue;
1022 if (error) {
1023 eccline->error |= bit;
1024 } else {
1025 eccline->dirty &= ~bit;
1026 }
1027 if (eccline->bufs[i]) {
1028 len = eccline->bufs_len[i];
1029 nestiobuf_done(eccline->bufs[i], len, error);
1030 eccline->bufs[i] = NULL;
1031 }
1032 }
1033 KASSERT(eccline->dirty == 0);
1034
1035 KASSERT(error == 0);
1036 /*
1037 * XXX TODO on write errors allocate a sparable entry and reissue
1038 */
1039
1040 udf_puteccline(eccline);
1041 }
1042
1043
1044 static void
1045 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
1046 {
1047 struct udf_mount *ump = eccline->ump;
1048 struct strat_private *priv = PRIV(ump);
1049 struct buf *buf, *nestbuf;
1050 uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
1051 uint32_t start;
1052 int sector_size = ump->discinfo.sector_size;
1053 int blks = sector_size / DEV_BSIZE;
1054 int i;
1055
1056 if (queued_on == UDF_SHED_READING) {
1057 DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
1058 /* read all bits that are not yet present */
1059 eccline->readin = (~eccline->present) & allbits;
1060 KASSERT(eccline->readin);
1061 start = eccline->start_sector;
1062 buf = eccline->buf;
1063 buf->b_flags = B_READ | B_ASYNC;
1064 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1065 buf->b_oflags = 0;
1066 buf->b_iodone = udf_shedule_read_callback;
1067 buf->b_data = eccline->blob;
1068 buf->b_bcount = ump->packet_size * sector_size;
1069 buf->b_resid = buf->b_bcount;
1070 buf->b_bufsize = buf->b_bcount;
1071 buf->b_private = eccline;
1072 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1073 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1074 buf->b_proc = NULL;
1075
1076 if (eccline->present != 0) {
1077 for (i = 0; i < ump->packet_size; i++) {
1078 bit = (uint64_t) 1 << i;
1079 if (eccline->present & bit) {
1080 nestiobuf_done(buf, sector_size, 0);
1081 continue;
1082 }
1083 nestbuf = getiobuf(NULL, true);
1084 nestiobuf_setup(buf, nestbuf, i * sector_size,
1085 sector_size);
1086 /* adjust blocknumber to read */
1087 nestbuf->b_blkno = buf->b_blkno + i*blks;
1088 nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
1089
1090 DPRINTF(SHEDULE, ("sector %d ",
1091 start + i));
1092 /* call asynchronous */
1093 VOP_STRATEGY(ump->devvp, nestbuf);
1094 }
1095 DPRINTF(SHEDULE, ("\n"));
1096 return;
1097 }
1098 } else {
1099 /* write or seqwrite */
1100 DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
1101 DPRINTF(SHEDULE, ("\n\tpresent %"PRIx64", readin %"PRIx64", "
1102 "dirty %"PRIx64"\n\t", eccline->present, eccline->readin,
1103 eccline->dirty));
1104 if (eccline->present != allbits) {
1105 /* requeue to read-only */
1106 DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
1107 "reading\n"));
1108 udf_push_eccline(eccline, UDF_SHED_READING);
1109 return;
1110 }
1111 start = eccline->start_sector;
1112 buf = eccline->buf;
1113 buf->b_flags = B_WRITE | B_ASYNC;
1114 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1115 buf->b_oflags = 0;
1116 buf->b_iodone = udf_shedule_write_callback;
1117 buf->b_data = eccline->blob;
1118 buf->b_bcount = ump->packet_size * sector_size;
1119 buf->b_resid = buf->b_bcount;
1120 buf->b_bufsize = buf->b_bcount;
1121 buf->b_private = eccline;
1122 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1123 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1124 buf->b_proc = NULL;
1125 }
1126
1127 mutex_exit(&priv->discstrat_mutex);
1128 /* call asynchronous */
1129 DPRINTF(SHEDULE, ("sector %d for %d\n",
1130 start, ump->packet_size));
1131 VOP_STRATEGY(ump->devvp, buf);
1132 mutex_enter(&priv->discstrat_mutex);
1133 }
1134
1135
1136 static void
1137 udf_discstrat_thread(void *arg)
1138 {
1139 struct udf_mount *ump = (struct udf_mount *) arg;
1140 struct strat_private *priv = PRIV(ump);
1141 struct udf_eccline *eccline;
1142 struct timespec now, *last;
1143 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
1144 int new_queue, wait, work, num, cnt;
1145
1146 work = 1;
1147 priv->thread_running = 1;
1148 mutex_enter(&priv->discstrat_mutex);
1149 priv->num_floating = 0;
1150 while (priv->run_thread || work || priv->num_floating) {
1151 /* get our time */
1152 vfs_timestamp(&now);
1153
1154 /* maintenance: handle eccline state machine */
1155 num = priv->num_queued[UDF_SHED_WAITING];
1156 cnt = 0;
1157 while (cnt < num) {
1158 eccline = udf_pop_eccline(priv, UDF_SHED_WAITING);
1159 /* requeue */
1160 new_queue = UDF_SHED_FREE;
1161 if (eccline->refcnt > 0)
1162 new_queue = UDF_SHED_IDLE;
1163 if (eccline->flags & ECC_WANTED)
1164 new_queue = UDF_SHED_IDLE;
1165 if (eccline->readin)
1166 new_queue = UDF_SHED_READING;
1167 if (eccline->dirty) {
1168 new_queue = UDF_SHED_WAITING;
1169 if ((eccline->wait_time.tv_sec - now.tv_sec <= 0) ||
1170 ((eccline->present == allbits) &&
1171 (eccline->flags & ECC_SEQWRITING)))
1172 {
1173 new_queue = UDF_SHED_WRITING;
1174 if (eccline->flags & ECC_SEQWRITING)
1175 new_queue = UDF_SHED_SEQWRITING;
1176 if (eccline->present != allbits)
1177 new_queue = UDF_SHED_READING;
1178 }
1179 }
1180 udf_push_eccline(eccline, new_queue);
1181 cnt++;
1182 }
1183
1184 /* maintenance: free exess ecclines */
1185 while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
1186 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1187 KASSERT(eccline);
1188 KASSERT(eccline->refcnt == 0);
1189 if (eccline->flags & ECC_WANTED) {
1190 udf_push_eccline(eccline, UDF_SHED_IDLE);
1191 DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
1192 } else {
1193 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
1194 udf_dispose_eccline(eccline);
1195 }
1196 }
1197
1198 /* process the current selected queue */
1199 /* get our time */
1200 vfs_timestamp(&now);
1201 last = &priv->last_queued[priv->cur_queue];
1202
1203 /* get our line */
1204 eccline = udf_pop_eccline(priv, priv->cur_queue);
1205 if (eccline) {
1206 wait = 0;
1207 new_queue = priv->cur_queue;
1208 DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
1209
1210 /* complete the `get' by locking and refcounting it */
1211 UDF_LOCK_ECCLINE(eccline);
1212 eccline->refcnt++;
1213
1214 udf_issue_eccline(eccline, priv->cur_queue);
1215 } else {
1216 /* don't switch too quickly */
1217 if (now.tv_sec - last->tv_sec < 2) {
1218 /* wait some time */
1219 cv_timedwait(&priv->discstrat_cv,
1220 &priv->discstrat_mutex, hz);
1221 /* we assume there is work to be done */
1222 work = 1;
1223 continue;
1224 }
1225
1226 /* XXX select on queue lengths ? */
1227 wait = 1;
1228 /* check if we can/should switch */
1229 new_queue = priv->cur_queue;
1230 if (bufq_peek(priv->queues[UDF_SHED_READING]))
1231 new_queue = UDF_SHED_READING;
1232 if (bufq_peek(priv->queues[UDF_SHED_WRITING]))
1233 new_queue = UDF_SHED_WRITING;
1234 if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
1235 new_queue = UDF_SHED_SEQWRITING;
1236 }
1237
1238 /* give room */
1239 mutex_exit(&priv->discstrat_mutex);
1240
1241 if (new_queue != priv->cur_queue) {
1242 wait = 0;
1243 DPRINTF(SHEDULE, ("switching from %d to %d\n",
1244 priv->cur_queue, new_queue));
1245 priv->cur_queue = new_queue;
1246 }
1247 mutex_enter(&priv->discstrat_mutex);
1248
1249 /* wait for more if needed */
1250 if (wait)
1251 cv_timedwait(&priv->discstrat_cv,
1252 &priv->discstrat_mutex, hz/4); /* /8 */
1253
1254 work = (bufq_peek(priv->queues[UDF_SHED_WAITING]) != NULL);
1255 work |= (bufq_peek(priv->queues[UDF_SHED_READING]) != NULL);
1256 work |= (bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL);
1257 work |= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
1258
1259 DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
1260 (bufq_peek(priv->queues[UDF_SHED_READING]) != NULL),
1261 (bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL),
1262 (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
1263 work, priv->num_floating));
1264 }
1265
1266 mutex_exit(&priv->discstrat_mutex);
1267
1268 /* tear down remaining ecclines */
1269 mutex_enter(&priv->discstrat_mutex);
1270 KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0);
1271 KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
1272 KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
1273 KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
1274 KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
1275
1276 KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL);
1277 KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL);
1278 KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
1279 KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
1280 KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
1281 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1282 while (eccline) {
1283 udf_dispose_eccline(eccline);
1284 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1285 }
1286 KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
1287 mutex_exit(&priv->discstrat_mutex);
1288
1289 priv->thread_running = 0;
1290 priv->thread_finished = 1;
1291 wakeup(&priv->run_thread);
1292 kthread_exit(0);
1293 /* not reached */
1294 }
1295
1296 /* --------------------------------------------------------------------- */
1297
1298 /*
1299 * Buffer memory pool allocator.
1300 */
1301
1302 static void *
1303 ecclinepool_page_alloc(struct pool *pp, int flags)
1304 {
1305 return (void *)uvm_km_alloc(kernel_map,
1306 MAXBSIZE, MAXBSIZE,
1307 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
1308 | UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
1309 }
1310
1311 static void
1312 ecclinepool_page_free(struct pool *pp, void *v)
1313 {
1314 uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
1315 }
1316
1317 static struct pool_allocator ecclinepool_allocator = {
1318 .pa_alloc = ecclinepool_page_alloc,
1319 .pa_free = ecclinepool_page_free,
1320 .pa_pagesz = MAXBSIZE,
1321 };
1322
1323
1324 static void
1325 udf_discstrat_init_rmw(struct udf_strat_args *args)
1326 {
1327 struct udf_mount *ump = args->ump;
1328 struct strat_private *priv = PRIV(ump);
1329 uint32_t lb_size, blobsize, hashline;
1330 int i;
1331
1332 KASSERT(ump);
1333 KASSERT(ump->logical_vol);
1334 KASSERT(priv == NULL);
1335
1336 lb_size = udf_rw32(ump->logical_vol->lb_size);
1337 blobsize = ump->packet_size * lb_size;
1338 KASSERT(lb_size > 0);
1339 KASSERT(ump->packet_size <= 64);
1340
1341 /* initialise our memory space */
1342 ump->strategy_private = malloc(sizeof(struct strat_private),
1343 M_UDFTEMP, M_WAITOK);
1344 priv = ump->strategy_private;
1345 memset(priv, 0 , sizeof(struct strat_private));
1346
1347 /* initialise locks */
1348 cv_init(&priv->discstrat_cv, "udfstrat");
1349 mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
1350 mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
1351
1352 /* initialise struct eccline pool */
1353 pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
1354 0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
1355
1356 /* initialise eccline blob pool */
1357 ecclinepool_allocator.pa_pagesz = blobsize;
1358 pool_init(&priv->ecclineblob_pool, blobsize,
1359 0, 0, 0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
1360
1361 /* initialise main queues */
1362 for (i = 0; i < UDF_SHED_MAX; i++) {
1363 priv->num_queued[i] = 0;
1364 vfs_timestamp(&priv->last_queued[i]);
1365 }
1366 bufq_alloc(&priv->queues[UDF_SHED_WAITING], "fcfs",
1367 BUFQ_SORT_RAWBLOCK);
1368 bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
1369 BUFQ_SORT_RAWBLOCK);
1370 bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
1371 BUFQ_SORT_RAWBLOCK);
1372 bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
1373
1374 /* initialise administrative queues */
1375 bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
1376 bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
1377
1378 for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
1379 LIST_INIT(&priv->eccline_hash[hashline]);
1380 }
1381
1382 /* create our disk strategy thread */
1383 priv->cur_queue = UDF_SHED_READING;
1384 priv->thread_finished = 0;
1385 priv->thread_running = 0;
1386 priv->run_thread = 1;
1387 if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
1388 udf_discstrat_thread, ump, &priv->queue_lwp,
1389 "%s", "udf_rw")) {
1390 panic("fork udf_rw");
1391 }
1392
1393 /* wait for thread to spin up */
1394 while (!priv->thread_running) {
1395 tsleep(&priv->thread_running, PRIBIO+1, "udfshedstart", hz);
1396 }
1397 }
1398
1399
1400 static void
1401 udf_discstrat_finish_rmw(struct udf_strat_args *args)
1402 {
1403 struct udf_mount *ump = args->ump;
1404 struct strat_private *priv = PRIV(ump);
1405 int error;
1406
1407 if (ump == NULL)
1408 return;
1409
1410 /* stop our sheduling thread */
1411 KASSERT(priv->run_thread == 1);
1412 priv->run_thread = 0;
1413 wakeup(priv->queue_lwp);
1414 while (!priv->thread_finished) {
1415 error = tsleep(&priv->run_thread, PRIBIO+1,
1416 "udfshedfin", hz);
1417 }
1418 /* kthread should be finished now */
1419
1420 /* cleanup our pools */
1421 pool_destroy(&priv->eccline_pool);
1422 pool_destroy(&priv->ecclineblob_pool);
1423
1424 cv_destroy(&priv->discstrat_cv);
1425 mutex_destroy(&priv->discstrat_mutex);
1426 mutex_destroy(&priv->seqwrite_mutex);
1427
1428 /* free our private space */
1429 free(ump->strategy_private, M_UDFTEMP);
1430 ump->strategy_private = NULL;
1431 }
1432
1433 /* --------------------------------------------------------------------- */
1434
1435 struct udf_strategy udf_strat_rmw =
1436 {
1437 udf_create_nodedscr_rmw,
1438 udf_free_nodedscr_rmw,
1439 udf_read_nodedscr_rmw,
1440 udf_write_nodedscr_rmw,
1441 udf_queuebuf_rmw,
1442 udf_discstrat_init_rmw,
1443 udf_discstrat_finish_rmw
1444 };
1445
1446