Home | History | Annotate | Line # | Download | only in udf
udf_strat_rmw.c revision 1.9.4.1
      1 /* $NetBSD: udf_strat_rmw.c,v 1.9.4.1 2008/11/02 22:56:06 snj Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2006, 2008 Reinoud Zandijk
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #ifndef lint
     31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.9.4.1 2008/11/02 22:56:06 snj Exp $");
     32 #endif /* not lint */
     33 
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_quota.h"
     37 #include "opt_compat_netbsd.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/sysctl.h>
     43 #include <sys/namei.h>
     44 #include <sys/proc.h>
     45 #include <sys/kernel.h>
     46 #include <sys/vnode.h>
     47 #include <miscfs/genfs/genfs_node.h>
     48 #include <sys/mount.h>
     49 #include <sys/buf.h>
     50 #include <sys/file.h>
     51 #include <sys/device.h>
     52 #include <sys/disklabel.h>
     53 #include <sys/ioctl.h>
     54 #include <sys/malloc.h>
     55 #include <sys/dirent.h>
     56 #include <sys/stat.h>
     57 #include <sys/conf.h>
     58 #include <sys/kauth.h>
     59 #include <sys/kthread.h>
     60 #include <dev/clock_subr.h>
     61 
     62 #include <fs/udf/ecma167-udf.h>
     63 #include <fs/udf/udf_mount.h>
     64 
     65 #include "udf.h"
     66 #include "udf_subr.h"
     67 #include "udf_bswap.h"
     68 
     69 
     70 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
     71 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
     72 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
     73 
     74 /* --------------------------------------------------------------------- */
     75 
     76 #define UDF_MAX_PACKET_SIZE	64			/* DONT change this */
     77 
     78 /* sheduler states */
     79 #define UDF_SHED_MAX		6
     80 #define UDF_SHED_READING	1
     81 #define UDF_SHED_WRITING	2
     82 #define UDF_SHED_SEQWRITING	3
     83 #define UDF_SHED_IDLE		4			/* resting */
     84 #define UDF_SHED_FREE		5			/* recycleable */
     85 
     86 /* flags */
     87 #define ECC_LOCKED		0x01			/* prevent access   */
     88 #define ECC_WANTED		0x02			/* trying access    */
     89 #define ECC_SEQWRITING		0x04			/* sequential queue */
     90 #define ECC_FLOATING		0x08			/* not queued yet   */
     91 
     92 
     93 TAILQ_HEAD(ecclineq, udf_eccline);
     94 struct udf_eccline {
     95 	struct udf_mount	 *ump;
     96 	uint64_t		  present;		/* preserve these */
     97 	uint64_t		  readin;		/* bitmap */
     98 	uint64_t		  dirty;		/* bitmap */
     99 	uint64_t		  error;		/* bitmap */
    100 	uint32_t		  refcnt;
    101 
    102 	uint32_t		  flags;
    103 	uint32_t		  start_sector;		/* physical */
    104 
    105 	struct buf		 *buf;
    106 	void			 *blob;
    107 
    108 	struct buf		 *bufs[UDF_MAX_PACKET_SIZE];
    109 	uint32_t		  bufs_bpos[UDF_MAX_PACKET_SIZE];
    110 	int			  bufs_len[UDF_MAX_PACKET_SIZE];
    111 
    112 	int			  queued_on;		/* on which BUFQ list */
    113 	LIST_ENTRY(udf_eccline)   hashchain;		/* on sector lookup  */
    114 };
    115 
    116 
    117 struct strat_private {
    118 	lwp_t			 *queue_lwp;
    119 	kcondvar_t		  discstrat_cv;		/* to wait on       */
    120 	kmutex_t		  discstrat_mutex;	/* disc strategy    */
    121 	kmutex_t		  seqwrite_mutex;	/* protect mappings */
    122 
    123 	int			  run_thread;		/* thread control */
    124 	int			  thread_finished;	/* thread control */
    125 	int			  cur_queue;
    126 
    127 	int			  num_floating;
    128 	int			  num_queued[UDF_SHED_MAX];
    129 	struct bufq_state	 *queues[UDF_SHED_MAX];
    130 	struct timespec		  last_queued[UDF_SHED_MAX];
    131 	struct disk_strategy	  old_strategy_setting;
    132 
    133 	struct pool		  eccline_pool;
    134 	struct pool		  ecclineblob_pool;
    135 	LIST_HEAD(, udf_eccline)  eccline_hash[UDF_ECCBUF_HASHSIZE];
    136 };
    137 
    138 /* --------------------------------------------------------------------- */
    139 
    140 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
    141 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
    142 
    143 /* can be called with or without discstrat lock */
    144 static void
    145 udf_lock_eccline(struct udf_eccline *eccline)
    146 {
    147 	struct strat_private *priv = PRIV(eccline->ump);
    148 	int waslocked, ret;
    149 
    150 	waslocked = mutex_owned(&priv->discstrat_mutex);
    151 	if (!waslocked)
    152 		mutex_enter(&priv->discstrat_mutex);
    153 
    154 	/* wait until its unlocked first */
    155 	while (eccline->flags & ECC_LOCKED) {
    156 		eccline->flags |= ECC_WANTED;
    157 		ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
    158 			hz/8);
    159 		if (ret == EWOULDBLOCK)
    160 			DPRINTF(LOCKING, ("eccline lock helt, waiting for "
    161 				"release"));
    162 	}
    163 	eccline->flags |= ECC_LOCKED;
    164 	eccline->flags &= ~ECC_WANTED;
    165 
    166 	if (!waslocked)
    167 		mutex_exit(&priv->discstrat_mutex);
    168 }
    169 
    170 
    171 /* can be called with or without discstrat lock */
    172 static void
    173 udf_unlock_eccline(struct udf_eccline *eccline)
    174 {
    175 	struct strat_private *priv = PRIV(eccline->ump);
    176 	int waslocked;
    177 
    178 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    179 
    180 	waslocked = mutex_owned(&priv->discstrat_mutex);
    181 	if (!waslocked)
    182 		mutex_enter(&priv->discstrat_mutex);
    183 
    184 	eccline->flags &= ~ECC_LOCKED;
    185 	cv_broadcast(&priv->discstrat_cv);
    186 
    187 	if (!waslocked)
    188 		mutex_exit(&priv->discstrat_mutex);
    189 }
    190 
    191 
    192 /* NOTE discstrat_mutex should be held! */
    193 static void
    194 udf_dispose_eccline(struct udf_eccline *eccline)
    195 {
    196 	struct strat_private *priv = PRIV(eccline->ump);
    197 	struct buf *ret;
    198 
    199 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    200 
    201 	KASSERT(eccline->refcnt == 0);
    202 	KASSERT(eccline->dirty  == 0);
    203 
    204 	DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
    205 		"present %0"PRIx64"\n", eccline->start_sector,
    206 		eccline->present));
    207 
    208 	if (eccline->queued_on) {
    209 		ret = BUFQ_CANCEL(priv->queues[eccline->queued_on], eccline->buf);
    210 		KASSERT(ret == eccline->buf);
    211 		priv->num_queued[eccline->queued_on]--;
    212 	}
    213 	LIST_REMOVE(eccline, hashchain);
    214 
    215 	if (eccline->flags & ECC_FLOATING) {
    216 		eccline->flags &= ~ECC_FLOATING;
    217 		priv->num_floating--;
    218 	}
    219 
    220 	putiobuf(eccline->buf);
    221 	pool_put(&priv->ecclineblob_pool, eccline->blob);
    222 	pool_put(&priv->eccline_pool, eccline);
    223 }
    224 
    225 
    226 /* NOTE discstrat_mutex should be held! */
    227 static void
    228 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
    229 {
    230 	struct strat_private *priv = PRIV(eccline->ump);
    231 	struct buf *ret;
    232 	int curqueue;
    233 
    234 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    235 
    236 	DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
    237 
    238 	/* requeue */
    239 	curqueue = eccline->queued_on;
    240 	if (curqueue) {
    241 		ret = BUFQ_CANCEL(priv->queues[curqueue], eccline->buf);
    242 
    243 		DPRINTF(PARANOIA, ("push_eccline BUFQ_CANCEL returned %p when "
    244 			"requested to remove %p from queue %d\n", ret,
    245 			eccline->buf, curqueue));
    246 #ifdef DIAGNOSTIC
    247 		if (ret == NULL) {
    248 			int i;
    249 
    250 			printf("udf_push_eccline: bufq_cancel can't find "
    251 				"buffer; dumping queues\n");
    252 			for (i = 1; i < UDF_SHED_MAX; i++) {
    253 				printf("queue %d\n\t", i);
    254 				ret = BUFQ_GET(priv->queues[i]);
    255 				while (ret) {
    256 					printf("%p ", ret);
    257 					if (ret == eccline->buf)
    258 						printf("[<-] ");
    259 					ret = BUFQ_GET(priv->queues[i]);
    260 				}
    261 				printf("\n");
    262 			}
    263 			panic("fatal queue bug; exit");
    264 		}
    265 #endif
    266 
    267 		KASSERT(ret == eccline->buf);
    268 		priv->num_queued[curqueue]--;
    269 	}
    270 
    271 	/* set buffer block numbers to make sure its queued correctly */
    272 	eccline->buf->b_lblkno   = eccline->start_sector;
    273 	eccline->buf->b_blkno    = eccline->start_sector;
    274 	eccline->buf->b_rawblkno = eccline->start_sector;
    275 
    276 	BUFQ_PUT(priv->queues[newqueue], eccline->buf);
    277 	eccline->queued_on = newqueue;
    278 	priv->num_queued[newqueue]++;
    279 	vfs_timestamp(&priv->last_queued[newqueue]);
    280 
    281 	if (eccline->flags & ECC_FLOATING) {
    282 		eccline->flags &= ~ECC_FLOATING;
    283 		priv->num_floating--;
    284 	}
    285 
    286 	if ((newqueue != UDF_SHED_FREE) && (newqueue != UDF_SHED_IDLE))
    287 		cv_signal(&priv->discstrat_cv);
    288 }
    289 
    290 
    291 static struct udf_eccline *
    292 udf_pop_eccline(struct strat_private *priv, int queued_on)
    293 {
    294 	struct udf_eccline *eccline;
    295 	struct buf *buf;
    296 
    297 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    298 
    299 	buf = BUFQ_GET(priv->queues[queued_on]);
    300 	if (!buf) {
    301 		KASSERT(priv->num_queued[queued_on] == 0);
    302 		return NULL;
    303 	}
    304 
    305 	eccline = BTOE(buf);
    306 	KASSERT(eccline->queued_on == queued_on);
    307 	eccline->queued_on = 0;
    308 	priv->num_queued[queued_on]--;
    309 
    310 	if (eccline->flags & ECC_FLOATING)
    311 		panic("popping already marked floating eccline");
    312 	eccline->flags |= ECC_FLOATING;
    313 	priv->num_floating++;
    314 
    315 	DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
    316 		eccline->buf, queued_on));
    317 
    318 	return eccline;
    319 }
    320 
    321 
    322 static struct udf_eccline *
    323 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
    324 {
    325 	struct strat_private *priv = PRIV(ump);
    326 	struct udf_eccline *eccline;
    327 	uint32_t start_sector, lb_size, blobsize;
    328 	uint8_t *eccline_blob;
    329 	int line, line_offset;
    330 	int num_busy, ret;
    331 
    332 	line_offset  = sector % ump->packet_size;
    333 	start_sector = sector - line_offset;
    334 	line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
    335 
    336 	mutex_enter(&priv->discstrat_mutex);
    337 
    338 retry:
    339 	DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
    340 	LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
    341 		if (eccline->start_sector == start_sector) {
    342 			DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
    343 				eccline->start_sector));
    344 
    345 			UDF_LOCK_ECCLINE(eccline);
    346 			/* move from freelist (!) */
    347 			if (eccline->queued_on == UDF_SHED_FREE) {
    348 				DPRINTF(ECCLINE, ("was on freelist\n"));
    349 				KASSERT(eccline->refcnt == 0);
    350 				udf_push_eccline(eccline, UDF_SHED_IDLE);
    351 			}
    352 			eccline->refcnt++;
    353 			mutex_exit(&priv->discstrat_mutex);
    354 			return eccline;
    355 		}
    356 	}
    357 
    358 	DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
    359 	/* not found in eccline cache */
    360 
    361 	lb_size  = udf_rw32(ump->logical_vol->lb_size);
    362 	blobsize = ump->packet_size * lb_size;
    363 
    364 	/* dont allow too many pending requests */
    365 	DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
    366 	num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
    367 	if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
    368 		ret = cv_timedwait(&priv->discstrat_cv,
    369 			&priv->discstrat_mutex, hz/8);
    370 		goto retry;
    371 	}
    372 
    373 	eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
    374 	eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
    375 	if ((eccline_blob == NULL) || (eccline == NULL)) {
    376 		if (eccline_blob)
    377 			pool_put(&priv->ecclineblob_pool, eccline_blob);
    378 		if (eccline)
    379 			pool_put(&priv->eccline_pool, eccline);
    380 
    381 		/* out of memory for now; canibalise freelist */
    382 		eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
    383 		if (eccline == NULL) {
    384 			/* serious trouble; wait and retry */
    385 			cv_timedwait(&priv->discstrat_cv,
    386 				&priv->discstrat_mutex, hz/8);
    387 			goto retry;
    388 		}
    389 		/* push back line if we're waiting for it */
    390 		if (eccline->flags & ECC_WANTED) {
    391 			udf_push_eccline(eccline, UDF_SHED_IDLE);
    392 			goto retry;
    393 		}
    394 
    395 		/* unlink this entry */
    396 		LIST_REMOVE(eccline, hashchain);
    397 
    398 		KASSERT(eccline->flags & ECC_FLOATING);
    399 
    400 		eccline_blob = eccline->blob;
    401 		memset(eccline, 0, sizeof(struct udf_eccline));
    402 		eccline->flags = ECC_FLOATING;
    403 	} else {
    404 		memset(eccline, 0, sizeof(struct udf_eccline));
    405 		eccline->flags = ECC_FLOATING;
    406 		priv->num_floating++;
    407 	}
    408 
    409 	eccline->queued_on = 0;
    410 	eccline->blob = eccline_blob;
    411 	eccline->buf  = getiobuf(NULL, true);
    412 	eccline->buf->b_private = eccline;	/* IMPORTANT */
    413 
    414 	/* initialise eccline blob */
    415 	memset(eccline->blob, 0, blobsize);
    416 
    417 	eccline->ump = ump;
    418 	eccline->present = eccline->readin = eccline->dirty = 0;
    419 	eccline->error = 0;
    420 	eccline->refcnt = 0;
    421 
    422 	eccline->start_sector    = start_sector;
    423 	eccline->buf->b_lblkno   = start_sector;
    424 	eccline->buf->b_blkno    = start_sector;
    425 	eccline->buf->b_rawblkno = start_sector;
    426 
    427 	LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
    428 
    429 	/*
    430 	 * TODO possible optimalisation for checking overlap with partitions
    431 	 * to get a clue on future eccline usage
    432 	 */
    433 	eccline->refcnt++;
    434 	UDF_LOCK_ECCLINE(eccline);
    435 
    436 	mutex_exit(&priv->discstrat_mutex);
    437 
    438 	return eccline;
    439 }
    440 
    441 
    442 static void
    443 udf_puteccline(struct udf_eccline *eccline)
    444 {
    445 	struct strat_private *priv = PRIV(eccline->ump);
    446 	struct udf_eccline *deccline;
    447 	struct udf_mount *ump = eccline->ump;
    448 	uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
    449 	int newqueue, tries;
    450 
    451 	mutex_enter(&priv->discstrat_mutex);
    452 
    453 	/* clear directly all readin requests from present ones */
    454 	if (eccline->readin & eccline->present) {
    455 		/* clear all read bits that are already read in */
    456 		eccline->readin &= (~eccline->present) & allbits;
    457 		wakeup(eccline);
    458 	}
    459 
    460 	DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
    461 		eccline->start_sector, eccline->refcnt));
    462 
    463 	/* requeue */
    464 	newqueue = UDF_SHED_FREE;
    465 	if (eccline->refcnt > 1)
    466 		newqueue = UDF_SHED_IDLE;
    467 	if (eccline->flags & ECC_WANTED)
    468 		newqueue = UDF_SHED_IDLE;
    469 	if (eccline->dirty) {
    470 		newqueue = UDF_SHED_WRITING;
    471 		if (eccline->flags & ECC_SEQWRITING)
    472 			newqueue = UDF_SHED_SEQWRITING;
    473 	}
    474 
    475 	/* if we have active nodes */
    476 	if (eccline->refcnt > 1) {
    477 		/* we dont set it on seqwriting */
    478 		eccline->flags &= ~ECC_SEQWRITING;
    479 	}
    480 
    481 	/* if we need reading in or not all is yet present, queue reading */
    482 	if ((eccline->readin) || (eccline->present != allbits))
    483 		newqueue = UDF_SHED_READING;
    484 
    485 	/* reduce the number of kept free buffers */
    486 	tries = priv->num_queued[UDF_SHED_FREE] - UDF_ECCLINE_MAXFREE;
    487 	while (tries > 0 /* priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE */) {
    488 		deccline = udf_pop_eccline(priv, UDF_SHED_FREE);
    489 		KASSERT(deccline);
    490 		KASSERT(deccline->refcnt == 0);
    491 		if (deccline->flags & ECC_WANTED) {
    492 			udf_push_eccline(deccline, UDF_SHED_IDLE);
    493 			DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
    494 		} else {
    495 			DPRINTF(ECCLINE, ("Removing entry from free list\n"));
    496 			udf_dispose_eccline(deccline);
    497 		}
    498 		tries--;
    499 	}
    500 
    501 	udf_push_eccline(eccline, newqueue);
    502 
    503 	KASSERT(eccline->refcnt >= 1);
    504 	eccline->refcnt--;
    505 	UDF_UNLOCK_ECCLINE(eccline);
    506 
    507 	mutex_exit(&priv->discstrat_mutex);
    508 }
    509 
    510 /* --------------------------------------------------------------------- */
    511 
    512 static int
    513 udf_create_nodedscr_rmw(struct udf_strat_args *args)
    514 {
    515 	union dscrptr   **dscrptr  = &args->dscr;
    516 	struct udf_mount *ump      = args->ump;
    517 	struct long_ad   *icb      = args->icb;
    518 	struct udf_eccline *eccline;
    519 	uint64_t bit;
    520 	uint32_t sectornr, lb_size, dummy;
    521 	uint8_t *mem;
    522 	int error, eccsect;
    523 
    524 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    525 	if (error)
    526 		return error;
    527 
    528 	lb_size  = udf_rw32(ump->logical_vol->lb_size);
    529 
    530 	/* get our eccline */
    531 	eccline = udf_geteccline(ump, sectornr, 0);
    532 	eccsect = sectornr - eccline->start_sector;
    533 
    534 	bit = (uint64_t) 1 << eccsect;
    535 	eccline->readin  &= ~bit;	/* just in case */
    536 	eccline->present |=  bit;
    537 	eccline->dirty   &= ~bit;	/* Err... euhm... clean? */
    538 
    539 	eccline->refcnt++;
    540 
    541 	/* clear space */
    542 	mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
    543 	memset(mem, 0, lb_size);
    544 
    545 	udf_puteccline(eccline);
    546 
    547 	*dscrptr = (union dscrptr *) mem;
    548 	return 0;
    549 }
    550 
    551 
    552 static void
    553 udf_free_nodedscr_rmw(struct udf_strat_args *args)
    554 {
    555 	struct udf_mount *ump  = args->ump;
    556 	struct long_ad   *icb  = args->icb;
    557 	struct udf_eccline *eccline;
    558 	uint64_t bit;
    559 	uint32_t sectornr, dummy;
    560 	int error, eccsect;
    561 
    562 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    563 	if (error)
    564 		return;
    565 
    566 	/* get our eccline */
    567 	eccline = udf_geteccline(ump, sectornr, 0);
    568 	eccsect = sectornr - eccline->start_sector;
    569 
    570 	bit = (uint64_t) 1 << eccsect;
    571 	eccline->readin &= ~bit;	/* just in case */
    572 
    573 	KASSERT(eccline->refcnt >= 1);
    574 	eccline->refcnt--;
    575 
    576 	udf_puteccline(eccline);
    577 }
    578 
    579 
    580 static int
    581 udf_read_nodedscr_rmw(struct udf_strat_args *args)
    582 {
    583 	union dscrptr   **dscrptr = &args->dscr;
    584 	struct udf_mount *ump = args->ump;
    585 	struct long_ad   *icb = args->icb;
    586 	struct udf_eccline *eccline;
    587 	uint64_t bit;
    588 	uint32_t sectornr, dummy;
    589 	uint8_t *pos;
    590 	int sector_size = ump->discinfo.sector_size;
    591 	int lb_size = udf_rw32(ump->logical_vol->lb_size);
    592 	int i, error, dscrlen, eccsect;
    593 
    594 	lb_size = lb_size;
    595 	KASSERT(sector_size == lb_size);
    596 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    597 	if (error)
    598 		return error;
    599 
    600 	/* get our eccline */
    601 	eccline = udf_geteccline(ump, sectornr, 0);
    602 	eccsect = sectornr - eccline->start_sector;
    603 
    604 	bit = (uint64_t) 1 << eccsect;
    605 	if ((eccline->present & bit) == 0) {
    606 		/* mark bit for readin */
    607 		eccline->readin |= bit;
    608 		eccline->refcnt++;	/* prevent recycling */
    609 		KASSERT(eccline->bufs[eccsect] == NULL);
    610 		udf_puteccline(eccline);
    611 
    612 		/* wait for completion; XXX remodel to lock bit code */
    613 		error = 0;
    614 		while ((eccline->present & bit) == 0) {
    615 			tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
    616 			if (eccline->error & bit) {
    617 				KASSERT(eccline->refcnt >= 1);
    618 				eccline->refcnt--;	/* undo temp refcnt */
    619 				*dscrptr = NULL;
    620 				return EIO;		/* XXX error code */
    621 			}
    622 		}
    623 
    624 		/* reget our line */
    625 		eccline = udf_geteccline(ump, sectornr, 0);
    626 		KASSERT(eccline->refcnt >= 1);
    627 		eccline->refcnt--;	/* undo refcnt */
    628 	}
    629 
    630 	*dscrptr = (union dscrptr *)
    631 		(((uint8_t *) eccline->blob) + eccsect * sector_size);
    632 
    633 	/* code from read_phys_descr */
    634 	/* check if its a valid tag */
    635 	error = udf_check_tag(*dscrptr);
    636 	if (error) {
    637 		/* check if its an empty block */
    638 		pos = (uint8_t *) *dscrptr;
    639 		for (i = 0; i < sector_size; i++, pos++) {
    640 			if (*pos) break;
    641 		}
    642 		if (i == sector_size) {
    643 			/* return no error but with no dscrptr */
    644 			error = 0;
    645 		}
    646 		*dscrptr = NULL;
    647 		udf_puteccline(eccline);
    648 		return error;
    649 	}
    650 
    651 	/* calculate descriptor size */
    652 	dscrlen = udf_tagsize(*dscrptr, sector_size);
    653 	error = udf_check_tag_payload(*dscrptr, dscrlen);
    654 	if (error) {
    655 		*dscrptr = NULL;
    656 		udf_puteccline(eccline);
    657 		return error;
    658 	}
    659 
    660 	eccline->refcnt++;
    661 	udf_puteccline(eccline);
    662 
    663 	return 0;
    664 }
    665 
    666 
    667 static int
    668 udf_write_nodedscr_rmw(struct udf_strat_args *args)
    669 {
    670 	union dscrptr    *dscrptr = args->dscr;
    671 	struct udf_mount *ump = args->ump;
    672 	struct long_ad   *icb = args->icb;
    673 	struct udf_node *udf_node = args->udf_node;
    674 	struct udf_eccline *eccline;
    675 	uint64_t bit;
    676 	uint32_t sectornr, logsectornr, dummy;
    677 	// int waitfor  = args->waitfor;
    678 	int sector_size = ump->discinfo.sector_size;
    679 	int lb_size = udf_rw32(ump->logical_vol->lb_size);
    680 	int error, eccsect;
    681 
    682 	lb_size = lb_size;
    683 	KASSERT(sector_size == lb_size);
    684 	sectornr    = 0;
    685 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    686 	if (error)
    687 		return error;
    688 
    689 	/* add reference to the vnode to prevent recycling */
    690 	vhold(udf_node->vnode);
    691 
    692 	/* get our eccline */
    693 	eccline = udf_geteccline(ump, sectornr, 0);
    694 	eccsect = sectornr - eccline->start_sector;
    695 
    696 	bit = (uint64_t) 1 << eccsect;
    697 
    698 	/* old callback still pending? */
    699 	if (eccline->bufs[eccsect]) {
    700 		DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
    701 					" over buffer?\n"));
    702 		nestiobuf_done(eccline->bufs[eccsect],
    703 				eccline->bufs_len[eccsect],
    704 				0);
    705 		eccline->bufs[eccsect] = NULL;
    706 	}
    707 
    708 	/* set sector number in the descriptor and validate */
    709 	dscrptr = (union dscrptr *)
    710 		(((uint8_t *) eccline->blob) + eccsect * sector_size);
    711 	KASSERT(dscrptr == args->dscr);
    712 
    713 	logsectornr = udf_rw32(icb->loc.lb_num);
    714 	dscrptr->tag.tag_loc = udf_rw32(logsectornr);
    715 	udf_validate_tag_and_crc_sums(dscrptr);
    716 
    717 	udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
    718 
    719 	/* set our flags */
    720 	KASSERT(eccline->present & bit);
    721 	eccline->dirty |= bit;
    722 
    723 	KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
    724 
    725 	udf_puteccline(eccline);
    726 
    727 	holdrele(udf_node->vnode);
    728 	udf_node->outstanding_nodedscr--;
    729 	if (udf_node->outstanding_nodedscr == 0) {
    730 		UDF_UNLOCK_NODE(udf_node, udf_node->i_flags & IN_CALLBACK_ULK);
    731 		wakeup(&udf_node->outstanding_nodedscr);
    732 	}
    733 
    734 	/* XXX waitfor not used */
    735 	return 0;
    736 }
    737 
    738 
    739 static void
    740 udf_queuebuf_rmw(struct udf_strat_args *args)
    741 {
    742 	struct udf_mount *ump = args->ump;
    743 	struct buf *buf = args->nestbuf;
    744 	struct desc_tag *tag;
    745 	struct strat_private *priv = PRIV(ump);
    746 	struct udf_eccline *eccline;
    747 	struct long_ad *node_ad_cpy;
    748 	uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
    749 	uint32_t buf_len, len, sectors, sectornr, our_sectornr;
    750 	uint32_t bpos;
    751 	uint16_t vpart_num;
    752 	uint8_t *fidblk, *src, *dst;
    753 	int sector_size = ump->discinfo.sector_size;
    754 	int blks = sector_size / DEV_BSIZE;
    755 	int eccsect, what, queue, error;
    756 
    757 	KASSERT(ump);
    758 	KASSERT(buf);
    759 	KASSERT(buf->b_iodone == nestiobuf_iodone);
    760 
    761 	blknr        = buf->b_blkno;
    762 	our_sectornr = blknr / blks;
    763 
    764 	what = buf->b_udf_c_type;
    765 	queue = UDF_SHED_READING;
    766 	if ((buf->b_flags & B_READ) == 0) {
    767 		/* writing */
    768 		queue = UDF_SHED_SEQWRITING;
    769 		if (what == UDF_C_DSCR)
    770 			queue = UDF_SHED_WRITING;
    771 		if (what == UDF_C_NODE)
    772 			queue = UDF_SHED_WRITING;
    773 	}
    774 
    775 	if (queue == UDF_SHED_READING) {
    776 		DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
    777 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
    778 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    779 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    780 
    781 		/* mark bits for reading */
    782 		buf_len = buf->b_bcount;
    783 		sectornr = our_sectornr;
    784 		eccline = udf_geteccline(ump, sectornr, 0);
    785 		eccsect = sectornr - eccline->start_sector;
    786 		bpos = 0;
    787 		while (buf_len) {
    788 			len = MIN(buf_len, sector_size);
    789 			if (eccsect == ump->packet_size) {
    790 				udf_puteccline(eccline);
    791 				eccline = udf_geteccline(ump, sectornr, 0);
    792 				eccsect = sectornr - eccline->start_sector;
    793 			}
    794 			bit = (uint64_t) 1 << eccsect;
    795 			error = eccline->error & bit ? EIO : 0;
    796 			if (eccline->present & bit) {
    797 				src = (uint8_t *) eccline->blob +
    798 					eccsect * sector_size;
    799 				dst = (uint8_t *) buf->b_data + bpos;
    800 				if (!error)
    801 					memcpy(dst, src, len);
    802 				nestiobuf_done(buf, len, error);
    803 			} else {
    804 				eccline->readin |= bit;
    805 				KASSERT(eccline->bufs[eccsect] == NULL);
    806 				eccline->bufs[eccsect] = buf;
    807 				eccline->bufs_bpos[eccsect] = bpos;
    808 				eccline->bufs_len[eccsect] = len;
    809 			}
    810 			bpos += sector_size;
    811 			eccsect++;
    812 			sectornr++;
    813 			buf_len -= len;
    814 		}
    815 		udf_puteccline(eccline);
    816 		return;
    817 	}
    818 
    819 	if (queue == UDF_SHED_WRITING) {
    820 		DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
    821 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    822 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    823 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    824 		/* if we have FIDs fixup using buffer's sector number(s) */
    825 		if (buf->b_udf_c_type == UDF_C_FIDS) {
    826 			panic("UDF_C_FIDS in SHED_WRITING!\n");
    827 #if 0
    828 			buf_len = buf->b_bcount;
    829 			sectornr = our_sectornr;
    830 			bpos = 0;
    831 			while (buf_len) {
    832 				len = MIN(buf_len, sector_size);
    833 				fidblk = (uint8_t *) buf->b_data + bpos;
    834 				udf_fixup_fid_block(fidblk, sector_size,
    835 					0, len, sectornr);
    836 				sectornr++;
    837 				bpos += len;
    838 				buf_len -= len;
    839 			}
    840 #endif
    841 		}
    842 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    843 
    844 		/* copy parts into the bufs and set for writing */
    845 		buf_len = buf->b_bcount;
    846 		sectornr = our_sectornr;
    847 		eccline = udf_geteccline(ump, sectornr, 0);
    848 		eccsect = sectornr - eccline->start_sector;
    849 		bpos = 0;
    850 		while (buf_len) {
    851 			len = MIN(buf_len, sector_size);
    852 			if (eccsect == ump->packet_size) {
    853 				udf_puteccline(eccline);
    854 				eccline = udf_geteccline(ump, sectornr, 0);
    855 				eccsect = sectornr - eccline->start_sector;
    856 			}
    857 			bit = (uint64_t) 1 << eccsect;
    858 			KASSERT((eccline->readin & bit) == 0);
    859 			eccline->present |= bit;
    860 			eccline->dirty   |= bit;
    861 			if (eccline->bufs[eccsect]) {
    862 				/* old callback still pending */
    863 				nestiobuf_done(eccline->bufs[eccsect],
    864 						eccline->bufs_len[eccsect],
    865 						0);
    866 				eccline->bufs[eccsect] = NULL;
    867 			}
    868 
    869 			src = (uint8_t *) buf->b_data + bpos;
    870 			dst = (uint8_t *) eccline->blob + eccsect * sector_size;
    871 			if (len != sector_size)
    872 				memset(dst, 0, sector_size);
    873 			memcpy(dst, src, len);
    874 
    875 			/* note that its finished for this extent */
    876 			eccline->bufs[eccsect] = NULL;
    877 			nestiobuf_done(buf, len, 0);
    878 
    879 			bpos += sector_size;
    880 			eccsect++;
    881 			sectornr++;
    882 			buf_len -= len;
    883 		}
    884 		udf_puteccline(eccline);
    885 		return;
    886 
    887 	}
    888 
    889 	/* sequential writing */
    890 	KASSERT(queue == UDF_SHED_SEQWRITING);
    891 	DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
    892 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    893 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
    894 		buf->b_bufsize));
    895 	/*
    896 	 * Buffers should not have been allocated to disc addresses yet on
    897 	 * this queue. Note that a buffer can get multiple extents allocated.
    898 	 * Note that it *looks* like the normal writing but its different in
    899 	 * the details.
    900 	 *
    901 	 * lmapping contains lb_num relative to base partition.
    902 	 *
    903 	 * XXX should we try to claim/organize the allocated memory to
    904 	 * block-aligned pieces?
    905 	 */
    906 	mutex_enter(&priv->seqwrite_mutex);
    907 
    908 	lmapping    = ump->la_lmapping;
    909 	node_ad_cpy = ump->la_node_ad_cpy;
    910 
    911 	/* logically allocate buf and map it in the file */
    912 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
    913 
    914 	/* if we have FIDs, fixup using the new allocation table */
    915 	if (buf->b_udf_c_type == UDF_C_FIDS) {
    916 		buf_len = buf->b_bcount;
    917 		bpos = 0;
    918 		lmappos = lmapping;
    919 		while (buf_len) {
    920 			sectornr = *lmappos++;
    921 			len = MIN(buf_len, sector_size);
    922 			fidblk = (uint8_t *) buf->b_data + bpos;
    923 			udf_fixup_fid_block(fidblk, sector_size,
    924 				0, len, sectornr);
    925 			bpos += len;
    926 			buf_len -= len;
    927 		}
    928 	}
    929 	if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
    930 		if (buf->b_lblkno == 0) {
    931 			/* update the tag location inside */
    932 			tag = (struct desc_tag *) buf->b_data;
    933 			tag->tag_loc = udf_rw32(*lmapping);
    934 			udf_validate_tag_and_crc_sums(buf->b_data);
    935 		}
    936 	}
    937 	udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    938 
    939 	/*
    940 	 * Translate new mappings in lmapping to pmappings.
    941 	 * pmapping to contain lb_nums as used for disc adressing.
    942 	 */
    943 	pmapping = ump->la_pmapping;
    944 	sectors  = (buf->b_bcount + sector_size -1) / sector_size;
    945 	udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
    946 
    947 	/* copy parts into the bufs and set for writing */
    948 	pmappos = pmapping;
    949 	buf_len = buf->b_bcount;
    950 	sectornr = *pmappos++;
    951 	eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
    952 	eccsect = sectornr - eccline->start_sector;
    953 	bpos = 0;
    954 	while (buf_len) {
    955 		len = MIN(buf_len, sector_size);
    956 		eccsect = sectornr - eccline->start_sector;
    957 		if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
    958 			eccline->flags |= ECC_SEQWRITING;
    959 			udf_puteccline(eccline);
    960 			eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
    961 			eccsect = sectornr - eccline->start_sector;
    962 		}
    963 		bit = (uint64_t) 1 << eccsect;
    964 		KASSERT((eccline->readin & bit) == 0);
    965 		eccline->present |= bit;
    966 		eccline->dirty   |= bit;
    967 		eccline->bufs[eccsect] = NULL;
    968 
    969 		src = (uint8_t *) buf->b_data + bpos;
    970 		dst = (uint8_t *)
    971 			eccline->blob + eccsect * sector_size;
    972 		if (len != sector_size)
    973 			memset(dst, 0, sector_size);
    974 		memcpy(dst, src, len);
    975 
    976 		/* note that its finished for this extent */
    977 		nestiobuf_done(buf, len, 0);
    978 
    979 		bpos += sector_size;
    980 		sectornr = *pmappos++;
    981 		buf_len -= len;
    982 	}
    983 	eccline->flags |= ECC_SEQWRITING;
    984 	udf_puteccline(eccline);
    985 	mutex_exit(&priv->seqwrite_mutex);
    986 }
    987 
    988 /* --------------------------------------------------------------------- */
    989 
    990 static void
    991 udf_shedule_read_callback(struct buf *buf)
    992 {
    993 	struct udf_eccline *eccline = BTOE(buf);
    994 	struct udf_mount *ump = eccline->ump;
    995 	uint64_t bit;
    996 	uint8_t *src, *dst;
    997 	int sector_size = ump->discinfo.sector_size;
    998 	int error, i, len;
    999 
   1000 	DPRINTF(ECCLINE, ("read callback called\n"));
   1001 	/* post process read action */
   1002 	error = buf->b_error;
   1003 	for (i = 0; i < ump->packet_size; i++) {
   1004 		bit = (uint64_t) 1 << i;
   1005 		src = (uint8_t *) buf->b_data +   i * sector_size;
   1006 		dst = (uint8_t *) eccline->blob + i * sector_size;
   1007 		if (eccline->present & bit)
   1008 			continue;
   1009 		eccline->present |= bit;
   1010 		if (error)
   1011 			eccline->error |= bit;
   1012 		if (eccline->bufs[i]) {
   1013 			dst = (uint8_t *) eccline->bufs[i]->b_data +
   1014 				eccline->bufs_bpos[i];
   1015 			len = eccline->bufs_len[i];
   1016 			if (!error)
   1017 				memcpy(dst, src, len);
   1018 			nestiobuf_done(eccline->bufs[i], len, error);
   1019 			eccline->bufs[i] = NULL;
   1020 		}
   1021 
   1022 	}
   1023 	KASSERT(buf->b_data == eccline->blob);
   1024 	KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
   1025 
   1026 	/*
   1027 	 * XXX TODO what to do on read errors? read in all sectors
   1028 	 * synchronously and allocate a sparable entry?
   1029 	 */
   1030 
   1031 	wakeup(eccline);
   1032 	udf_puteccline(eccline);
   1033 	DPRINTF(ECCLINE, ("read callback finished\n"));
   1034 }
   1035 
   1036 
   1037 static void
   1038 udf_shedule_write_callback(struct buf *buf)
   1039 {
   1040 	struct udf_eccline *eccline = BTOE(buf);
   1041 	struct udf_mount *ump = eccline->ump;
   1042 	uint64_t bit;
   1043 	int error, i, len;
   1044 
   1045 	DPRINTF(ECCLINE, ("write callback called\n"));
   1046 	/* post process write action */
   1047 	error = buf->b_error;
   1048 	for (i = 0; i < ump->packet_size; i++) {
   1049 		bit = (uint64_t) 1 << i;
   1050 		if ((eccline->dirty & bit) == 0)
   1051 			continue;
   1052 		if (error) {
   1053 			eccline->error |= bit;
   1054 		} else {
   1055 			eccline->dirty &= ~bit;
   1056 		}
   1057 		if (eccline->bufs[i]) {
   1058 			len = eccline->bufs_len[i];
   1059 			nestiobuf_done(eccline->bufs[i], len, error);
   1060 			eccline->bufs[i] = NULL;
   1061 		}
   1062 	}
   1063 	KASSERT(eccline->dirty == 0);
   1064 
   1065 	KASSERT(error == 0);
   1066 	/*
   1067 	 * XXX TODO on write errors allocate a sparable entry
   1068 	 */
   1069 
   1070 	wakeup(eccline);
   1071 	udf_puteccline(eccline);
   1072 }
   1073 
   1074 
   1075 static void
   1076 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
   1077 {
   1078 	struct udf_mount *ump = eccline->ump;
   1079 	struct strat_private *priv = PRIV(ump);
   1080 	struct buf *buf, *nestbuf;
   1081 	uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
   1082 	uint32_t start;
   1083 	int sector_size = ump->discinfo.sector_size;
   1084 	int blks = sector_size / DEV_BSIZE;
   1085 	int i;
   1086 
   1087 	if (queued_on == UDF_SHED_READING) {
   1088 		DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
   1089 		/* read all bits that are not yet present */
   1090 		eccline->readin = (~eccline->present) & allbits;
   1091 		KASSERT(eccline->readin);
   1092 		start = eccline->start_sector;
   1093 		buf = eccline->buf;
   1094 		buf_init(buf);
   1095 		buf->b_flags    = B_READ | B_ASYNC;
   1096 		SET(buf->b_cflags, BC_BUSY);	/* mark buffer busy */
   1097 		buf->b_oflags   = 0;
   1098 		buf->b_iodone   = udf_shedule_read_callback;
   1099 		buf->b_data     = eccline->blob;
   1100 		buf->b_bcount   = ump->packet_size * sector_size;
   1101 		buf->b_resid    = buf->b_bcount;
   1102 		buf->b_bufsize  = buf->b_bcount;
   1103 		buf->b_private  = eccline;
   1104 		BIO_SETPRIO(buf, BPRIO_DEFAULT);
   1105 		buf->b_lblkno   = buf->b_blkno = buf->b_rawblkno = start * blks;
   1106 		buf->b_proc     = NULL;
   1107 
   1108 		if (eccline->present != 0) {
   1109 			for (i = 0; i < ump->packet_size; i++) {
   1110 				bit = (uint64_t) 1 << i;
   1111 				if (eccline->present & bit) {
   1112 					nestiobuf_done(buf, sector_size, 0);
   1113 					continue;
   1114 				}
   1115 				nestbuf = getiobuf(NULL, true);
   1116 				nestiobuf_setup(buf, nestbuf, i * sector_size,
   1117 					sector_size);
   1118 				/* adjust blocknumber to read */
   1119 				nestbuf->b_blkno = buf->b_blkno + i*blks;
   1120 				nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
   1121 
   1122 				DPRINTF(SHEDULE, ("sector %d ",
   1123 					start + i));
   1124 				/* call asynchronous */
   1125 				VOP_STRATEGY(ump->devvp, nestbuf);
   1126 			}
   1127 			DPRINTF(SHEDULE, ("\n"));
   1128 			return;
   1129 		}
   1130 	} else {
   1131 		/* write or seqwrite */
   1132 		DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
   1133 		if (eccline->present != allbits) {
   1134 			/* requeue to read-only */
   1135 			DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
   1136 				"reading\n"));
   1137 			udf_push_eccline(eccline, UDF_SHED_READING);
   1138 			return;
   1139 		}
   1140 		start = eccline->start_sector;
   1141 		buf = eccline->buf;
   1142 		buf_init(buf);
   1143 		buf->b_flags    = B_WRITE | B_ASYNC;
   1144 		SET(buf->b_cflags, BC_BUSY);	/* mark buffer busy */
   1145 		buf->b_oflags   = 0;
   1146 		buf->b_iodone   = udf_shedule_write_callback;
   1147 		buf->b_data     = eccline->blob;
   1148 		buf->b_bcount   = ump->packet_size * sector_size;
   1149 		buf->b_resid    = buf->b_bcount;
   1150 		buf->b_bufsize  = buf->b_bcount;
   1151 		buf->b_private  = eccline;
   1152 		BIO_SETPRIO(buf, BPRIO_DEFAULT);
   1153 		buf->b_lblkno   = buf->b_blkno = buf->b_rawblkno = start * blks;
   1154 		buf->b_proc     = NULL;
   1155 	}
   1156 
   1157 	mutex_exit(&priv->discstrat_mutex);
   1158 		/* call asynchronous */
   1159 		DPRINTF(SHEDULE, ("sector %d for %d\n",
   1160 			start, ump->packet_size));
   1161 		VOP_STRATEGY(ump->devvp, buf);
   1162 	mutex_enter(&priv->discstrat_mutex);
   1163 }
   1164 
   1165 
   1166 static void
   1167 udf_discstrat_thread(void *arg)
   1168 {
   1169 	struct udf_mount *ump = (struct udf_mount *) arg;
   1170 	struct strat_private *priv = PRIV(ump);
   1171 	struct udf_eccline *eccline;
   1172 	struct timespec now, *last;
   1173 	int new_queue, wait, work;
   1174 
   1175 	work = 1;
   1176 	mutex_enter(&priv->discstrat_mutex);
   1177 	priv->num_floating = 0;
   1178 	while (priv->run_thread || work || priv->num_floating) {
   1179 		/* process the current selected queue */
   1180 		/* maintenance: free exess ecclines */
   1181 		while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
   1182 			eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
   1183 			KASSERT(eccline);
   1184 			KASSERT(eccline->refcnt == 0);
   1185 			DPRINTF(ECCLINE, ("Removing entry from free list\n"));
   1186 			udf_dispose_eccline(eccline);
   1187 		}
   1188 
   1189 		/* get our time */
   1190 		vfs_timestamp(&now);
   1191 		last = &priv->last_queued[priv->cur_queue];
   1192 
   1193 		/* don't shedule too quickly when there is only one */
   1194 		if (priv->cur_queue == UDF_SHED_WRITING) {
   1195 			if (priv->num_queued[priv->cur_queue] <= 2) {
   1196 				if (now.tv_sec - last->tv_sec < 4) {
   1197 					/* wait some time */
   1198 					cv_timedwait(&priv->discstrat_cv,
   1199 						&priv->discstrat_mutex, hz);
   1200 					continue;
   1201 				}
   1202 			}
   1203 		}
   1204 
   1205 		/* get our line */
   1206 		eccline = udf_pop_eccline(priv, priv->cur_queue);
   1207 		if (eccline) {
   1208 			wait = 0;
   1209 			new_queue = priv->cur_queue;
   1210 			DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
   1211 
   1212 			/* complete the `get' by locking and refcounting it */
   1213 			UDF_LOCK_ECCLINE(eccline);
   1214 			eccline->refcnt++;
   1215 
   1216 			udf_issue_eccline(eccline, priv->cur_queue);
   1217 		} else {
   1218 			wait = 1;
   1219 			/* check if we can/should switch */
   1220 			new_queue = priv->cur_queue;
   1221 			if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
   1222 				new_queue = UDF_SHED_READING;
   1223 			if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))
   1224 				new_queue = UDF_SHED_WRITING;
   1225 			if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
   1226 				new_queue = UDF_SHED_SEQWRITING;
   1227 
   1228 			/* dont switch seqwriting too fast */
   1229 			if (priv->cur_queue == UDF_SHED_READING) {
   1230 				if (now.tv_sec - last->tv_sec < 1)
   1231 					new_queue = priv->cur_queue;
   1232 			}
   1233 			if (priv->cur_queue == UDF_SHED_WRITING) {
   1234 				if (now.tv_sec - last->tv_sec < 2)
   1235 					new_queue = priv->cur_queue;
   1236 			}
   1237 			if (priv->cur_queue == UDF_SHED_SEQWRITING) {
   1238 				if (now.tv_sec - last->tv_sec < 4)
   1239 					new_queue = priv->cur_queue;
   1240 			}
   1241 		}
   1242 
   1243 		/* give room */
   1244 		mutex_exit(&priv->discstrat_mutex);
   1245 
   1246 		if (new_queue != priv->cur_queue) {
   1247 			wait = 0;
   1248 			DPRINTF(SHEDULE, ("switching from %d to %d\n",
   1249 				priv->cur_queue, new_queue));
   1250 			priv->cur_queue = new_queue;
   1251 		}
   1252 		mutex_enter(&priv->discstrat_mutex);
   1253 
   1254 		/* wait for more if needed */
   1255 		if (wait)
   1256 			cv_timedwait(&priv->discstrat_cv,
   1257 				&priv->discstrat_mutex, hz);	/* /8 */
   1258 
   1259 		work  = (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL);
   1260 		work |= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL);
   1261 		work |= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
   1262 
   1263 		DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
   1264 			(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL),
   1265 			(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL),
   1266 			(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
   1267 			work, priv->num_floating));
   1268 	}
   1269 
   1270 	mutex_exit(&priv->discstrat_mutex);
   1271 
   1272 	/* tear down remaining ecclines */
   1273 	mutex_enter(&priv->discstrat_mutex);
   1274 	KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
   1275 	KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
   1276 	KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
   1277 	KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
   1278 
   1279 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_IDLE]) == NULL);
   1280 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
   1281 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
   1282 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
   1283 	eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
   1284 	while (eccline) {
   1285 		udf_dispose_eccline(eccline);
   1286 		eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
   1287 	}
   1288 	KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
   1289 	mutex_exit(&priv->discstrat_mutex);
   1290 
   1291 	priv->thread_finished = 1;
   1292 	wakeup(&priv->run_thread);
   1293 	kthread_exit(0);
   1294 	/* not reached */
   1295 }
   1296 
   1297 /* --------------------------------------------------------------------- */
   1298 
   1299 /*
   1300  * Buffer memory pool allocator.
   1301  */
   1302 
   1303 static void *
   1304 ecclinepool_page_alloc(struct pool *pp, int flags)
   1305 {
   1306         return (void *)uvm_km_alloc(kernel_map,
   1307             MAXBSIZE, MAXBSIZE,
   1308             ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
   1309 	    	| UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
   1310 }
   1311 
   1312 static void
   1313 ecclinepool_page_free(struct pool *pp, void *v)
   1314 {
   1315         uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
   1316 }
   1317 
   1318 static struct pool_allocator ecclinepool_allocator = {
   1319         .pa_alloc = ecclinepool_page_alloc,
   1320         .pa_free  = ecclinepool_page_free,
   1321         .pa_pagesz = MAXBSIZE,
   1322 };
   1323 
   1324 
   1325 static void
   1326 udf_discstrat_init_rmw(struct udf_strat_args *args)
   1327 {
   1328 	struct udf_mount *ump = args->ump;
   1329 	struct strat_private *priv = PRIV(ump);
   1330 	uint32_t lb_size, blobsize, hashline;
   1331 	int i;
   1332 
   1333 	KASSERT(ump);
   1334 	KASSERT(ump->logical_vol);
   1335 	KASSERT(priv == NULL);
   1336 
   1337 	lb_size = udf_rw32(ump->logical_vol->lb_size);
   1338 	blobsize = ump->packet_size * lb_size;
   1339 	KASSERT(lb_size > 0);
   1340 	KASSERT(ump->packet_size <= 64);
   1341 
   1342 	/* initialise our memory space */
   1343 	ump->strategy_private = malloc(sizeof(struct strat_private),
   1344 		M_UDFTEMP, M_WAITOK);
   1345 	priv = ump->strategy_private;
   1346 	memset(priv, 0 , sizeof(struct strat_private));
   1347 
   1348 	/* initialise locks */
   1349 	cv_init(&priv->discstrat_cv, "udfstrat");
   1350 	mutex_init(&priv->discstrat_mutex, MUTEX_DRIVER, IPL_BIO);
   1351 	mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
   1352 
   1353 	/* initialise struct eccline pool */
   1354 	pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
   1355 		0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
   1356 
   1357 	/* initialise eccline blob pool */
   1358 	pool_init(&priv->ecclineblob_pool, blobsize,
   1359 		0,0,0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
   1360 
   1361 	/* initialise main queues */
   1362 	for (i = 0; i < UDF_SHED_MAX; i++) {
   1363 		priv->num_queued[i] = 0;
   1364 		vfs_timestamp(&priv->last_queued[i]);
   1365 	}
   1366 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
   1367 		BUFQ_SORT_RAWBLOCK);
   1368 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
   1369 		BUFQ_SORT_RAWBLOCK);
   1370 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
   1371 
   1372 	/* initialise administrative queues */
   1373 	bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
   1374 	bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
   1375 
   1376 	for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
   1377 		LIST_INIT(&priv->eccline_hash[hashline]);
   1378 	}
   1379 
   1380 	/* create our disk strategy thread */
   1381 	priv->cur_queue = UDF_SHED_READING;
   1382 	priv->thread_finished = 0;
   1383 	priv->run_thread      = 1;
   1384 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
   1385 		udf_discstrat_thread, ump, &priv->queue_lwp,
   1386 		"%s", "udf_rw")) {
   1387 		panic("fork udf_rw");
   1388 	}
   1389 }
   1390 
   1391 
   1392 static void
   1393 udf_discstrat_finish_rmw(struct udf_strat_args *args)
   1394 {
   1395 	struct udf_mount *ump = args->ump;
   1396 	struct strat_private *priv = PRIV(ump);
   1397 	int error;
   1398 
   1399 	if (ump == NULL)
   1400 		return;
   1401 
   1402 	/* stop our sheduling thread */
   1403 	KASSERT(priv->run_thread == 1);
   1404 	priv->run_thread = 0;
   1405 	wakeup(priv->queue_lwp);
   1406 	while (!priv->thread_finished) {
   1407 		error = tsleep(&priv->run_thread, PRIBIO+1,
   1408 			"udfshedfin", hz);
   1409 	}
   1410 	/* kthread should be finished now */
   1411 
   1412 	/* cleanup our pools */
   1413 	pool_destroy(&priv->eccline_pool);
   1414 	pool_destroy(&priv->ecclineblob_pool);
   1415 
   1416 	cv_destroy(&priv->discstrat_cv);
   1417 	mutex_destroy(&priv->discstrat_mutex);
   1418 	mutex_destroy(&priv->seqwrite_mutex);
   1419 
   1420 	/* free our private space */
   1421 	free(ump->strategy_private, M_UDFTEMP);
   1422 	ump->strategy_private = NULL;
   1423 }
   1424 
   1425 /* --------------------------------------------------------------------- */
   1426 
   1427 struct udf_strategy udf_strat_rmw =
   1428 {
   1429 	udf_create_nodedscr_rmw,
   1430 	udf_free_nodedscr_rmw,
   1431 	udf_read_nodedscr_rmw,
   1432 	udf_write_nodedscr_rmw,
   1433 	udf_queuebuf_rmw,
   1434 	udf_discstrat_init_rmw,
   1435 	udf_discstrat_finish_rmw
   1436 };
   1437 
   1438