Home | History | Annotate | Line # | Download | only in udf
udf_strat_rmw.c revision 1.3
      1 /* $NetBSD: udf_strat_rmw.c,v 1.3 2008/05/15 14:22:40 reinoud Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2006, 2008 Reinoud Zandijk
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #ifndef lint
     31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.3 2008/05/15 14:22:40 reinoud Exp $");
     32 #endif /* not lint */
     33 
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_quota.h"
     37 #include "opt_compat_netbsd.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/sysctl.h>
     43 #include <sys/namei.h>
     44 #include <sys/proc.h>
     45 #include <sys/kernel.h>
     46 #include <sys/vnode.h>
     47 #include <miscfs/genfs/genfs_node.h>
     48 #include <sys/mount.h>
     49 #include <sys/buf.h>
     50 #include <sys/file.h>
     51 #include <sys/device.h>
     52 #include <sys/disklabel.h>
     53 #include <sys/ioctl.h>
     54 #include <sys/malloc.h>
     55 #include <sys/dirent.h>
     56 #include <sys/stat.h>
     57 #include <sys/conf.h>
     58 #include <sys/kauth.h>
     59 #include <sys/kthread.h>
     60 #include <dev/clock_subr.h>
     61 
     62 #include <fs/udf/ecma167-udf.h>
     63 #include <fs/udf/udf_mount.h>
     64 
     65 #if defined(_KERNEL_OPT)
     66 #include "opt_udf.h"
     67 #endif
     68 
     69 #include "udf.h"
     70 #include "udf_subr.h"
     71 #include "udf_bswap.h"
     72 
     73 
     74 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
     75 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
     76 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
     77 
     78 /* --------------------------------------------------------------------- */
     79 
     80 #define UDF_MAX_PACKET_SIZE	64			/* DONT change this */
     81 
     82 /* sheduler states */
     83 #define UDF_SHED_MAX		6
     84 #define UDF_SHED_READING	1
     85 #define UDF_SHED_WRITING	2
     86 #define UDF_SHED_SEQWRITING	3
     87 #define UDF_SHED_IDLE		4			/* resting */
     88 #define UDF_SHED_FREE		5			/* recycleable */
     89 
     90 /* flags */
     91 #define ECC_LOCKED		0x01			/* prevent access   */
     92 #define ECC_WANTED		0x02			/* trying access    */
     93 #define ECC_SEQWRITING		0x04			/* sequential queue */
     94 #define ECC_FLOATING		0x08			/* not queued yet   */
     95 
     96 
     97 TAILQ_HEAD(ecclineq, udf_eccline);
     98 struct udf_eccline {
     99 	struct udf_mount	 *ump;
    100 	uint64_t		  present;		/* preserve these */
    101 	uint64_t		  readin;		/* bitmap */
    102 	uint64_t		  dirty;		/* bitmap */
    103 	uint64_t		  error;		/* bitmap */
    104 	uint32_t		  refcnt;
    105 
    106 	uint32_t		  flags;
    107 	uint32_t		  start_sector;		/* physical */
    108 
    109 	struct buf		 *buf;
    110 	void			 *blob;
    111 
    112 	struct buf		 *bufs[UDF_MAX_PACKET_SIZE];
    113 	uint32_t		  bufs_bpos[UDF_MAX_PACKET_SIZE];
    114 	int			  bufs_len[UDF_MAX_PACKET_SIZE];
    115 
    116 	int			  queued_on;		/* on which BUFQ list */
    117 	LIST_ENTRY(udf_eccline)   hashchain;		/* on sector lookup  */
    118 };
    119 
    120 
    121 struct strat_private {
    122 	lwp_t			 *queue_lwp;
    123 	kcondvar_t		  discstrat_cv;		/* to wait on       */
    124 	kmutex_t		  discstrat_mutex;	/* disc strategy    */
    125 	kmutex_t		  seqwrite_mutex;	/* protect mappings */
    126 
    127 	int			  run_thread;		/* thread control */
    128 	int			  thread_finished;	/* thread control */
    129 	int			  cur_queue;
    130 
    131 	int			  num_floating;
    132 	int			  num_queued[UDF_SHED_MAX];
    133 	struct bufq_state	 *queues[UDF_SHED_MAX];
    134 	struct timespec		  last_queued[UDF_SHED_MAX];
    135 	struct disk_strategy	  old_strategy_setting;
    136 
    137 	struct pool		  eccline_pool;
    138 	struct pool		  ecclineblob_pool;
    139 	LIST_HEAD(, udf_eccline)  eccline_hash[UDF_ECCBUF_HASHSIZE];
    140 };
    141 
    142 /* --------------------------------------------------------------------- */
    143 
    144 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline)
    145 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline)
    146 
    147 /* can be called with or without discstrat lock */
    148 static void
    149 udf_lock_eccline(struct udf_eccline *eccline)
    150 {
    151 	struct strat_private *priv = PRIV(eccline->ump);
    152 	int waslocked, ret;
    153 
    154 	waslocked = mutex_owned(&priv->discstrat_mutex);
    155 	if (!waslocked)
    156 		mutex_enter(&priv->discstrat_mutex);
    157 
    158 	/* wait until its unlocked first */
    159 	while (eccline->flags & ECC_LOCKED) {
    160 		eccline->flags |= ECC_WANTED;
    161 		ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
    162 			hz/8);
    163 		if (ret == EWOULDBLOCK)
    164 			DPRINTF(LOCKING, ("eccline lock helt, waiting for "
    165 				"release"));
    166 	}
    167 	eccline->flags |= ECC_LOCKED;
    168 	eccline->flags &= ~ECC_WANTED;
    169 
    170 	if (!waslocked)
    171 		mutex_exit(&priv->discstrat_mutex);
    172 }
    173 
    174 
    175 /* can be called with or without discstrat lock */
    176 static void
    177 udf_unlock_eccline(struct udf_eccline *eccline)
    178 {
    179 	struct strat_private *priv = PRIV(eccline->ump);
    180 	int waslocked;
    181 
    182 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    183 
    184 	waslocked = mutex_owned(&priv->discstrat_mutex);
    185 	if (!waslocked)
    186 		mutex_enter(&priv->discstrat_mutex);
    187 
    188 	eccline->flags &= ~ECC_LOCKED;
    189 	cv_broadcast(&priv->discstrat_cv);
    190 
    191 	if (!waslocked)
    192 		mutex_exit(&priv->discstrat_mutex);
    193 }
    194 
    195 
    196 /* NOTE discstrat_mutex should be held! */
    197 static void
    198 udf_dispose_eccline(struct udf_eccline *eccline)
    199 {
    200 	struct strat_private *priv = PRIV(eccline->ump);
    201 	struct buf *ret;
    202 
    203 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    204 
    205 	KASSERT(eccline->refcnt == 0);
    206 	KASSERT(eccline->dirty  == 0);
    207 
    208 	DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
    209 		"present %0"PRIx64"\n", eccline->start_sector,
    210 		eccline->present));
    211 
    212 	if (eccline->queued_on) {
    213 		ret = BUFQ_CANCEL(priv->queues[eccline->queued_on], eccline->buf);
    214 		KASSERT(ret == eccline->buf);
    215 		priv->num_queued[eccline->queued_on]--;
    216 	}
    217 	LIST_REMOVE(eccline, hashchain);
    218 
    219 	if (eccline->flags & ECC_FLOATING) {
    220 		eccline->flags &= ~ECC_FLOATING;
    221 		priv->num_floating--;
    222 	}
    223 
    224 	putiobuf(eccline->buf);
    225 	pool_put(&priv->ecclineblob_pool, eccline->blob);
    226 	pool_put(&priv->eccline_pool, eccline);
    227 }
    228 
    229 
    230 /* NOTE discstrat_mutex should be held! */
    231 static void
    232 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
    233 {
    234 	struct strat_private *priv = PRIV(eccline->ump);
    235 	struct buf *ret;
    236 	int curqueue;
    237 
    238 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    239 
    240 	DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
    241 
    242 	/* requeue */
    243 	curqueue = eccline->queued_on;
    244 	if (curqueue) {
    245 		ret = BUFQ_CANCEL(priv->queues[curqueue], eccline->buf);
    246 
    247 		DPRINTF(PARANOIA, ("push_eccline BUFQ_CANCEL returned %p when "
    248 			"requested to remove %p from queue %d\n", ret,
    249 			eccline->buf, curqueue));
    250 #ifdef DIAGNOSTIC
    251 		if (ret == NULL) {
    252 			int i;
    253 
    254 			printf("udf_push_eccline: bufq_cancel can't find "
    255 				"buffer; dumping queues\n");
    256 			for (i = 1; i < UDF_SHED_MAX; i++) {
    257 				printf("queue %d\n\t", i);
    258 				ret = BUFQ_GET(priv->queues[i]);
    259 				while (ret) {
    260 					printf("%p ", ret);
    261 					if (ret == eccline->buf)
    262 						printf("[<-] ");
    263 					ret = BUFQ_GET(priv->queues[i]);
    264 				}
    265 				printf("\n");
    266 			}
    267 			panic("fatal queue bug; exit");
    268 		}
    269 #endif
    270 
    271 		KASSERT(ret == eccline->buf);
    272 		priv->num_queued[curqueue]--;
    273 	}
    274 
    275 	BUFQ_PUT(priv->queues[newqueue], eccline->buf);
    276 	eccline->queued_on = newqueue;
    277 	priv->num_queued[newqueue]++;
    278 	vfs_timestamp(&priv->last_queued[newqueue]);
    279 
    280 	if (eccline->flags & ECC_FLOATING) {
    281 		eccline->flags &= ~ECC_FLOATING;
    282 		priv->num_floating--;
    283 	}
    284 
    285 	if ((newqueue != UDF_SHED_FREE) && (newqueue != UDF_SHED_IDLE))
    286 		cv_signal(&priv->discstrat_cv);
    287 }
    288 
    289 
    290 static struct udf_eccline *
    291 udf_pop_eccline(struct strat_private *priv, int queued_on)
    292 {
    293 	struct udf_eccline *eccline;
    294 	struct buf *buf;
    295 
    296 	KASSERT(mutex_owned(&priv->discstrat_mutex));
    297 
    298 	buf = BUFQ_GET(priv->queues[queued_on]);
    299 	if (!buf) {
    300 		KASSERT(priv->num_queued[queued_on] == 0);
    301 		return NULL;
    302 	}
    303 
    304 	eccline = BTOE(buf);
    305 	KASSERT(eccline->queued_on == queued_on);
    306 	eccline->queued_on = 0;
    307 	priv->num_queued[queued_on]--;
    308 
    309 	if (eccline->flags & ECC_FLOATING)
    310 		panic("popping already marked floating eccline");
    311 	eccline->flags |= ECC_FLOATING;
    312 	priv->num_floating++;
    313 
    314 	DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
    315 		eccline->buf, queued_on));
    316 
    317 	return eccline;
    318 }
    319 
    320 
    321 static struct udf_eccline *
    322 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
    323 {
    324 	struct strat_private *priv = PRIV(ump);
    325 	struct udf_eccline *eccline;
    326 	uint32_t start_sector, lb_size, blobsize;
    327 	uint8_t *eccline_blob;
    328 	int line, line_offset;
    329 	int num_busy, ret;
    330 
    331 	line_offset  = sector % ump->packet_size;
    332 	start_sector = sector - line_offset;
    333 	line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
    334 
    335 	mutex_enter(&priv->discstrat_mutex);
    336 
    337 retry:
    338 	DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
    339 	LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
    340 		if (eccline->start_sector == start_sector) {
    341 			DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
    342 				eccline->start_sector));
    343 
    344 			UDF_LOCK_ECCLINE(eccline);
    345 			/* move from freelist (!) */
    346 			if (eccline->queued_on == UDF_SHED_FREE) {
    347 				DPRINTF(ECCLINE, ("was on freelist\n"));
    348 				KASSERT(eccline->refcnt == 0);
    349 				udf_push_eccline(eccline, UDF_SHED_IDLE);
    350 			}
    351 			eccline->refcnt++;
    352 			mutex_exit(&priv->discstrat_mutex);
    353 			return eccline;
    354 		}
    355 	}
    356 
    357 	DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
    358 	/* not found in eccline cache */
    359 
    360 	lb_size  = udf_rw32(ump->logical_vol->lb_size);
    361 	blobsize = ump->packet_size * lb_size;
    362 
    363 	/* dont allow too many pending requests */
    364 	DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
    365 	num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
    366 	if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
    367 		ret = cv_timedwait(&priv->discstrat_cv,
    368 			&priv->discstrat_mutex, hz/8);
    369 		goto retry;
    370 	}
    371 
    372 	eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
    373 	eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
    374 	if ((eccline_blob == NULL) || (eccline == NULL)) {
    375 		if (eccline_blob)
    376 			pool_put(&priv->ecclineblob_pool, eccline_blob);
    377 		if (eccline)
    378 			pool_put(&priv->eccline_pool, eccline);
    379 
    380 		/* out of memory for now; canibalise freelist */
    381 		eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
    382 		if (eccline == NULL) {
    383 			/* serious trouble; wait and retry */
    384 			cv_timedwait(&priv->discstrat_cv,
    385 				&priv->discstrat_mutex, hz/8);
    386 			goto retry;
    387 		}
    388 		/* push back line if we're waiting for it */
    389 		if (eccline->flags & ECC_WANTED) {
    390 			udf_push_eccline(eccline, UDF_SHED_IDLE);
    391 			goto retry;
    392 		}
    393 
    394 		/* unlink this entry */
    395 		LIST_REMOVE(eccline, hashchain);
    396 
    397 		KASSERT(eccline->flags & ECC_FLOATING);
    398 
    399 		eccline_blob = eccline->blob;
    400 		memset(eccline, 0, sizeof(struct udf_eccline));
    401 		eccline->flags = ECC_FLOATING;
    402 	} else {
    403 		memset(eccline, 0, sizeof(struct udf_eccline));
    404 		eccline->flags = ECC_FLOATING;
    405 		priv->num_floating++;
    406 	}
    407 
    408 	eccline->queued_on = 0;
    409 	eccline->blob = eccline_blob;
    410 	eccline->buf  = getiobuf(NULL, true);
    411 	eccline->buf->b_private = eccline;	/* IMPORTANT */
    412 
    413 	/* initialise eccline blob */
    414 	memset(eccline->blob, 0, blobsize);
    415 
    416 	eccline->ump = ump;
    417 	eccline->present = eccline->readin = eccline->dirty = 0;
    418 	eccline->error = 0;
    419 	eccline->refcnt = 0;
    420 	eccline->start_sector = start_sector;
    421 
    422 	LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
    423 
    424 	/*
    425 	 * TODO possible optimalisation for checking overlap with partitions
    426 	 * to get a clue on future eccline usage
    427 	 */
    428 	eccline->refcnt++;
    429 	UDF_LOCK_ECCLINE(eccline);
    430 
    431 	mutex_exit(&priv->discstrat_mutex);
    432 
    433 	return eccline;
    434 }
    435 
    436 
    437 static void
    438 udf_puteccline(struct udf_eccline *eccline)
    439 {
    440 	struct strat_private *priv = PRIV(eccline->ump);
    441 	struct udf_eccline *deccline;
    442 	struct udf_mount *ump = eccline->ump;
    443 	uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
    444 	int newqueue, tries;
    445 
    446 	mutex_enter(&priv->discstrat_mutex);
    447 
    448 	/* clear directly all readin requests from present ones */
    449 	if (eccline->readin & eccline->present) {
    450 		/* clear all read bits that are already read in */
    451 		eccline->readin &= (~eccline->present) & allbits;
    452 		wakeup(eccline);
    453 	}
    454 
    455 	DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
    456 		eccline->start_sector, eccline->refcnt));
    457 
    458 	/* requeue */
    459 	newqueue = UDF_SHED_FREE;
    460 	if (eccline->refcnt > 1)
    461 		newqueue = UDF_SHED_IDLE;
    462 	if (eccline->flags & ECC_WANTED)
    463 		newqueue = UDF_SHED_IDLE;
    464 	if (eccline->dirty) {
    465 		newqueue = UDF_SHED_WRITING;
    466 		if (eccline->flags & ECC_SEQWRITING)
    467 			newqueue = UDF_SHED_SEQWRITING;
    468 	}
    469 
    470 	/* if we have active nodes */
    471 	if (eccline->refcnt > 1) {
    472 		/* we dont set it on seqwriting */
    473 		eccline->flags &= ~ECC_SEQWRITING;
    474 	}
    475 
    476 	/* if we need reading in or not all is yet present, queue reading */
    477 	if ((eccline->readin) || (eccline->present != allbits))
    478 		newqueue = UDF_SHED_READING;
    479 
    480 	/* reduce the number of kept free buffers */
    481 	tries = priv->num_queued[UDF_SHED_FREE] - UDF_ECCLINE_MAXFREE;
    482 	while (tries > 0 /* priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE */) {
    483 		deccline = udf_pop_eccline(priv, UDF_SHED_FREE);
    484 		KASSERT(deccline);
    485 		KASSERT(deccline->refcnt == 0);
    486 		if (deccline->flags & ECC_WANTED) {
    487 			udf_push_eccline(deccline, UDF_SHED_IDLE);
    488 			DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
    489 		} else {
    490 			DPRINTF(ECCLINE, ("Removing entry from free list\n"));
    491 			udf_dispose_eccline(deccline);
    492 		}
    493 		tries--;
    494 	}
    495 
    496 	udf_push_eccline(eccline, newqueue);
    497 
    498 	KASSERT(eccline->refcnt >= 1);
    499 	eccline->refcnt--;
    500 	UDF_UNLOCK_ECCLINE(eccline);
    501 
    502 	mutex_exit(&priv->discstrat_mutex);
    503 }
    504 
    505 /* --------------------------------------------------------------------- */
    506 
    507 static int
    508 udf_create_logvol_dscr_rmw(struct udf_strat_args *args)
    509 {
    510 	union dscrptr   **dscrptr  = &args->dscr;
    511 	struct udf_mount *ump      = args->ump;
    512 	struct long_ad   *icb      = args->icb;
    513 	struct udf_eccline *eccline;
    514 	uint64_t bit;
    515 	uint32_t sectornr, lb_size, dummy;
    516 	uint8_t *mem;
    517 	int error, eccsect;
    518 
    519 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    520 	if (error)
    521 		return error;
    522 
    523 	lb_size  = udf_rw32(ump->logical_vol->lb_size);
    524 
    525 	/* get our eccline */
    526 	eccline = udf_geteccline(ump, sectornr, 0);
    527 	eccsect = sectornr - eccline->start_sector;
    528 
    529 	bit = (uint64_t) 1 << eccsect;
    530 	eccline->readin  &= ~bit;	/* just in case */
    531 	eccline->present |=  bit;
    532 	eccline->dirty   &= ~bit;	/* Err... euhm... clean? */
    533 
    534 	eccline->refcnt++;
    535 
    536 	/* clear space */
    537 	mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
    538 	memset(mem, 0, lb_size);
    539 
    540 	udf_puteccline(eccline);
    541 
    542 	*dscrptr = (union dscrptr *) mem;
    543 	return 0;
    544 }
    545 
    546 
    547 static void
    548 udf_free_logvol_dscr_rmw(struct udf_strat_args *args)
    549 {
    550 	struct udf_mount *ump  = args->ump;
    551 	struct long_ad   *icb  = args->icb;
    552 	struct udf_eccline *eccline;
    553 	uint64_t bit;
    554 	uint32_t sectornr, dummy;
    555 	int error, eccsect;
    556 
    557 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    558 	if (error)
    559 		return;
    560 
    561 	/* get our eccline */
    562 	eccline = udf_geteccline(ump, sectornr, 0);
    563 	eccsect = sectornr - eccline->start_sector;
    564 
    565 	bit = (uint64_t) 1 << eccsect;
    566 	eccline->readin &= ~bit;	/* just in case */
    567 
    568 	KASSERT(eccline->refcnt >= 1);
    569 	eccline->refcnt--;
    570 
    571 	udf_puteccline(eccline);
    572 }
    573 
    574 
    575 static int
    576 udf_read_logvol_dscr_rmw(struct udf_strat_args *args)
    577 {
    578 	union dscrptr   **dscrptr = &args->dscr;
    579 	struct udf_mount *ump = args->ump;
    580 	struct long_ad   *icb = args->icb;
    581 	struct udf_eccline *eccline;
    582 	uint64_t bit;
    583 	uint32_t sectornr, dummy;
    584 	uint8_t *pos;
    585 	int sector_size = ump->discinfo.sector_size;
    586 	int lb_size = udf_rw32(ump->logical_vol->lb_size);
    587 	int i, error, dscrlen, eccsect;
    588 
    589 	lb_size = lb_size;
    590 	KASSERT(sector_size == lb_size);
    591 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    592 	if (error)
    593 		return error;
    594 
    595 	/* get our eccline */
    596 	eccline = udf_geteccline(ump, sectornr, 0);
    597 	eccsect = sectornr - eccline->start_sector;
    598 
    599 	bit = (uint64_t) 1 << eccsect;
    600 	if ((eccline->present & bit) == 0) {
    601 		/* mark bit for readin */
    602 		eccline->readin |= bit;
    603 		eccline->refcnt++;	/* prevent recycling */
    604 		KASSERT(eccline->bufs[eccsect] == NULL);
    605 		udf_puteccline(eccline);
    606 
    607 		/* wait for completion; XXX remodel to lock bit code */
    608 		error = 0;
    609 		while ((eccline->present & bit) == 0) {
    610 			tsleep(eccline, PRIBIO+1, "udflvdrd", hz/8);
    611 			if (eccline->error & bit) {
    612 				KASSERT(eccline->refcnt >= 1);
    613 				eccline->refcnt--;	/* undo temp refcnt */
    614 				*dscrptr = NULL;
    615 				return EIO;		/* XXX error code */
    616 			}
    617 		}
    618 
    619 		/* reget our line */
    620 		eccline = udf_geteccline(ump, sectornr, 0);
    621 		KASSERT(eccline->refcnt >= 1);
    622 		eccline->refcnt--;	/* undo refcnt */
    623 	}
    624 
    625 	*dscrptr = (union dscrptr *)
    626 		(((uint8_t *) eccline->blob) + eccsect * sector_size);
    627 
    628 	/* code from read_phys_descr */
    629 	/* check if its a valid tag */
    630 	error = udf_check_tag(*dscrptr);
    631 	if (error) {
    632 		/* check if its an empty block */
    633 		pos = (uint8_t *) *dscrptr;
    634 		for (i = 0; i < sector_size; i++, pos++) {
    635 			if (*pos) break;
    636 		}
    637 		if (i == sector_size) {
    638 			/* return no error but with no dscrptr */
    639 			error = 0;
    640 		}
    641 		*dscrptr = NULL;
    642 		udf_puteccline(eccline);
    643 		return error;
    644 	}
    645 
    646 	/* calculate descriptor size */
    647 	dscrlen = udf_tagsize(*dscrptr, sector_size);
    648 	error = udf_check_tag_payload(*dscrptr, dscrlen);
    649 	if (error) {
    650 		*dscrptr = NULL;
    651 		udf_puteccline(eccline);
    652 		return error;
    653 	}
    654 
    655 	eccline->refcnt++;
    656 	udf_puteccline(eccline);
    657 
    658 	return 0;
    659 }
    660 
    661 
    662 static int
    663 udf_write_logvol_dscr_rmw(struct udf_strat_args *args)
    664 {
    665 	union dscrptr    *dscrptr = args->dscr;
    666 	struct udf_mount *ump = args->ump;
    667 	struct long_ad   *icb = args->icb;
    668 	struct udf_node *udf_node = args->udf_node;
    669 	struct udf_eccline *eccline;
    670 	uint64_t bit;
    671 	uint32_t sectornr, logsectornr, dummy;
    672 	// int waitfor  = args->waitfor;
    673 	int sector_size = ump->discinfo.sector_size;
    674 	int lb_size = udf_rw32(ump->logical_vol->lb_size);
    675 	int error, eccsect;
    676 
    677 	lb_size = lb_size;
    678 	KASSERT(sector_size == lb_size);
    679 	sectornr    = 0;
    680 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    681 	if (error)
    682 		return error;
    683 
    684 	/* get our eccline */
    685 	eccline = udf_geteccline(ump, sectornr, 0);
    686 	eccsect = sectornr - eccline->start_sector;
    687 
    688 	bit = (uint64_t) 1 << eccsect;
    689 
    690 	/* old callback still pending? */
    691 	if (eccline->bufs[eccsect]) {
    692 		DPRINTF(WRITE, ("udf_write_logvol_dscr_rmw: writing descriptor"
    693 					" over buffer?\n"));
    694 		nestiobuf_done(eccline->bufs[eccsect],
    695 				eccline->bufs_len[eccsect],
    696 				0);
    697 		eccline->bufs[eccsect] = NULL;
    698 	}
    699 
    700 	UDF_LOCK_NODE(udf_node, IN_CALLBACK_ULK);
    701 
    702 	/* set sector number in the descriptor and validate */
    703 	dscrptr = (union dscrptr *)
    704 		(((uint8_t *) eccline->blob) + eccsect * sector_size);
    705 	KASSERT(dscrptr == args->dscr);
    706 
    707 	logsectornr = udf_rw32(icb->loc.lb_num);
    708 	dscrptr->tag.tag_loc = udf_rw32(logsectornr);
    709 	udf_validate_tag_and_crc_sums(dscrptr);
    710 
    711 	udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
    712 
    713 	/* set our flags */
    714 	KASSERT(eccline->present & bit);
    715 	eccline->dirty |= bit;
    716 
    717 	KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
    718 	UDF_UNLOCK_NODE(udf_node, IN_CALLBACK_ULK);
    719 
    720 	udf_puteccline(eccline);
    721 
    722 	/* XXX waitfor not used */
    723 	return 0;
    724 }
    725 
    726 
    727 static void
    728 udf_queuebuf_rmw(struct udf_strat_args *args)
    729 {
    730 	struct udf_mount *ump = args->ump;
    731 	struct buf *buf = args->nestbuf;
    732 	struct strat_private *priv = PRIV(ump);
    733 	struct udf_eccline *eccline;
    734 	struct long_ad *node_ad_cpy;
    735 	uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
    736 	uint32_t buf_len, len, sectornr, our_sectornr;
    737 	uint32_t bpos;
    738 	uint8_t *fidblk, *src, *dst;
    739 	int sector_size = ump->discinfo.sector_size;
    740 	int blks = sector_size / DEV_BSIZE;
    741 	int eccsect, what, queue, error;
    742 
    743 	KASSERT(ump);
    744 	KASSERT(buf);
    745 	KASSERT(buf->b_iodone == nestiobuf_iodone);
    746 
    747 	blknr        = buf->b_blkno;
    748 	our_sectornr = blknr / blks;
    749 
    750 	what = buf->b_udf_c_type;
    751 	queue = UDF_SHED_READING;
    752 	if ((buf->b_flags & B_READ) == 0) {
    753 		/* writing */
    754 		queue = UDF_SHED_SEQWRITING;
    755 		if (what == UDF_C_DSCR)
    756 			queue = UDF_SHED_WRITING;
    757 		if (what == UDF_C_NODE)
    758 			queue = UDF_SHED_WRITING;
    759 	}
    760 
    761 	if (queue == UDF_SHED_READING) {
    762 		DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
    763 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
    764 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    765 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    766 
    767 		/* mark bits for reading */
    768 		buf_len = buf->b_bcount;
    769 		sectornr = our_sectornr;
    770 		eccline = udf_geteccline(ump, sectornr, 0);
    771 		eccsect = sectornr - eccline->start_sector;
    772 		bpos = 0;
    773 		while (buf_len) {
    774 			len = MIN(buf_len, sector_size);
    775 			if (eccsect == ump->packet_size) {
    776 				udf_puteccline(eccline);
    777 				eccline = udf_geteccline(ump, sectornr, 0);
    778 				eccsect = sectornr - eccline->start_sector;
    779 			}
    780 			bit = (uint64_t) 1 << eccsect;
    781 			error = eccline->error & bit ? EIO : 0;
    782 			if (eccline->present & bit) {
    783 				src = (uint8_t *) eccline->blob +
    784 					eccsect * sector_size;
    785 				dst = (uint8_t *) buf->b_data + bpos;
    786 				if (!error)
    787 					memcpy(dst, src, len);
    788 				nestiobuf_done(buf, len, error);
    789 			} else {
    790 				eccline->readin |= bit;
    791 				KASSERT(eccline->bufs[eccsect] == NULL);
    792 				eccline->bufs[eccsect] = buf;
    793 				eccline->bufs_bpos[eccsect] = bpos;
    794 				eccline->bufs_len[eccsect] = len;
    795 			}
    796 			bpos += sector_size;
    797 			eccsect++;
    798 			sectornr++;
    799 			buf_len -= len;
    800 		}
    801 		udf_puteccline(eccline);
    802 		return;
    803 	}
    804 
    805 	if (queue == UDF_SHED_WRITING) {
    806 		DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
    807 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    808 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    809 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    810 		/* if we have FIDs fixup using buffer's sector number(s) */
    811 		if (buf->b_udf_c_type == UDF_C_FIDS) {
    812 			panic("UDF_C_FIDS in SHED_WRITING!\n");
    813 #if 0
    814 			buf_len = buf->b_bcount;
    815 			sectornr = our_sectornr;
    816 			bpos = 0;
    817 			while (buf_len) {
    818 				len = MIN(buf_len, sector_size);
    819 				fidblk = (uint8_t *) buf->b_data + bpos;
    820 				udf_fixup_fid_block(fidblk, sector_size,
    821 					0, len, sectornr);
    822 				sectornr++;
    823 				bpos += len;
    824 				buf_len -= len;
    825 			}
    826 #endif
    827 		}
    828 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    829 
    830 		/* copy parts into the bufs and set for writing */
    831 		buf_len = buf->b_bcount;
    832 		sectornr = our_sectornr;
    833 		eccline = udf_geteccline(ump, sectornr, 0);
    834 		eccsect = sectornr - eccline->start_sector;
    835 		bpos = 0;
    836 		while (buf_len) {
    837 			len = MIN(buf_len, sector_size);
    838 			if (eccsect == ump->packet_size) {
    839 				udf_puteccline(eccline);
    840 				eccline = udf_geteccline(ump, sectornr, 0);
    841 				eccsect = sectornr - eccline->start_sector;
    842 			}
    843 			bit = (uint64_t) 1 << eccsect;
    844 			KASSERT((eccline->readin & bit) == 0);
    845 			eccline->present |= bit;
    846 			eccline->dirty   |= bit;
    847 			if (eccline->bufs[eccsect]) {
    848 				/* old callback still pending */
    849 				nestiobuf_done(eccline->bufs[eccsect],
    850 						eccline->bufs_len[eccsect],
    851 						0);
    852 				eccline->bufs[eccsect] = NULL;
    853 			}
    854 
    855 			src = (uint8_t *) buf->b_data + bpos;
    856 			dst = (uint8_t *) eccline->blob + eccsect * sector_size;
    857 			if (len != sector_size)
    858 				memset(dst, 0, sector_size);
    859 			memcpy(dst, src, len);
    860 
    861 			/* note that its finished for this extent */
    862 			eccline->bufs[eccsect] = NULL;
    863 			nestiobuf_done(buf, len, 0);
    864 
    865 			bpos += sector_size;
    866 			eccsect++;
    867 			sectornr++;
    868 			buf_len -= len;
    869 		}
    870 		udf_puteccline(eccline);
    871 		return;
    872 
    873 	}
    874 
    875 	/* sequential writing */
    876 	KASSERT(queue == UDF_SHED_SEQWRITING);
    877 	DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
    878 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    879 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
    880 		buf->b_bufsize));
    881 	/*
    882 	 * Buffers should not have been allocated to disc addresses yet on
    883 	 * this queue. Note that a buffer can get multiple extents allocated.
    884 	 * Note that it *looks* like the normal writing but its different in
    885 	 * the details.
    886 	 *
    887 	 * lmapping contains lb_num relative to base partition.  pmapping
    888 	 * contains lb_num as used for disc adressing.
    889 	 */
    890 	mutex_enter(&priv->seqwrite_mutex);
    891 
    892 	lmapping    = ump->la_lmapping;
    893 	pmapping    = ump->la_pmapping;
    894 	node_ad_cpy = ump->la_node_ad_cpy;
    895 
    896 	/*
    897 	 * XXX should we try to claim/organize the allocated memory to block
    898 	 * aligned pieces?
    899 	 */
    900 	/* allocate buf and get its logical and physical mappings */
    901 	udf_late_allocate_buf(ump, buf, lmapping, pmapping, node_ad_cpy);
    902 
    903 	/* if we have FIDs, fixup using the new allocation table */
    904 	if (buf->b_udf_c_type == UDF_C_FIDS) {
    905 		buf_len = buf->b_bcount;
    906 		bpos = 0;
    907 		lmappos = lmapping;
    908 		while (buf_len) {
    909 			sectornr = *lmappos++;
    910 			len = MIN(buf_len, sector_size);
    911 			fidblk = (uint8_t *) buf->b_data + bpos;
    912 			udf_fixup_fid_block(fidblk, sector_size,
    913 				0, len, sectornr);
    914 			bpos += len;
    915 			buf_len -= len;
    916 		}
    917 	}
    918 	udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    919 
    920 	/* copy parts into the bufs and set for writing */
    921 	pmappos = pmapping;
    922 	buf_len = buf->b_bcount;
    923 	sectornr = *pmappos++;
    924 	eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
    925 	eccsect = sectornr - eccline->start_sector;
    926 	bpos = 0;
    927 	while (buf_len) {
    928 		len = MIN(buf_len, sector_size);
    929 		eccsect = sectornr - eccline->start_sector;
    930 		if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
    931 			eccline->flags |= ECC_SEQWRITING;
    932 			udf_puteccline(eccline);
    933 			eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
    934 			eccsect = sectornr - eccline->start_sector;
    935 		}
    936 		bit = (uint64_t) 1 << eccsect;
    937 		KASSERT((eccline->readin & bit) == 0);
    938 		eccline->present |= bit;
    939 		eccline->dirty   |= bit;
    940 		eccline->bufs[eccsect] = NULL;
    941 
    942 		src = (uint8_t *) buf->b_data + bpos;
    943 		dst = (uint8_t *)
    944 			eccline->blob + eccsect * sector_size;
    945 		if (len != sector_size)
    946 			memset(dst, 0, sector_size);
    947 		memcpy(dst, src, len);
    948 
    949 		/* note that its finished for this extent */
    950 		nestiobuf_done(buf, len, 0);
    951 
    952 		bpos += sector_size;
    953 		sectornr = *pmappos++;
    954 		buf_len -= len;
    955 	}
    956 	eccline->flags |= ECC_SEQWRITING;
    957 	udf_puteccline(eccline);
    958 	mutex_exit(&priv->seqwrite_mutex);
    959 }
    960 
    961 /* --------------------------------------------------------------------- */
    962 
    963 static void
    964 udf_shedule_read_callback(struct buf *buf)
    965 {
    966 	struct udf_eccline *eccline = BTOE(buf);
    967 	struct udf_mount *ump = eccline->ump;
    968 	uint64_t bit;
    969 	uint8_t *src, *dst;
    970 	int sector_size = ump->discinfo.sector_size;
    971 	int error, i, len;
    972 
    973 	DPRINTF(ECCLINE, ("read callback called\n"));
    974 	/* post process read action */
    975 	error = buf->b_error;
    976 	for (i = 0; i < ump->packet_size; i++) {
    977 		bit = (uint64_t) 1 << i;
    978 		src = (uint8_t *) buf->b_data +   i * sector_size;
    979 		dst = (uint8_t *) eccline->blob + i * sector_size;
    980 		if (eccline->present & bit)
    981 			continue;
    982 		if (error) {
    983 			eccline->error |= bit;
    984 		} else {
    985 			eccline->present |= bit;
    986 		}
    987 		if (eccline->bufs[i]) {
    988 			dst = (uint8_t *) eccline->bufs[i]->b_data +
    989 				eccline->bufs_bpos[i];
    990 			len = eccline->bufs_len[i];
    991 			if (!error)
    992 				memcpy(dst, src, len);
    993 			nestiobuf_done(eccline->bufs[i], len, error);
    994 			eccline->bufs[i] = NULL;
    995 		}
    996 
    997 	}
    998 	KASSERT(buf->b_data == eccline->blob);
    999 	KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
   1000 
   1001 	/*
   1002 	 * XXX TODO what to do on read errors? read in all sectors
   1003 	 * synchronously and allocate a sparable entry?
   1004 	 */
   1005 
   1006 	wakeup(eccline);
   1007 	udf_puteccline(eccline);
   1008 	DPRINTF(ECCLINE, ("read callback finished\n"));
   1009 }
   1010 
   1011 
   1012 static void
   1013 udf_shedule_write_callback(struct buf *buf)
   1014 {
   1015 	struct udf_eccline *eccline = BTOE(buf);
   1016 	struct udf_mount *ump = eccline->ump;
   1017 	uint64_t bit;
   1018 	int error, i, len;
   1019 
   1020 	DPRINTF(ECCLINE, ("write callback called\n"));
   1021 	/* post process write action */
   1022 	error = buf->b_error;
   1023 	for (i = 0; i < ump->packet_size; i++) {
   1024 		bit = (uint64_t) 1 << i;
   1025 		if ((eccline->dirty & bit) == 0)
   1026 			continue;
   1027 		if (error) {
   1028 			eccline->error |= bit;
   1029 		} else {
   1030 			eccline->dirty &= ~bit;
   1031 		}
   1032 		if (eccline->bufs[i]) {
   1033 			len = eccline->bufs_len[i];
   1034 			nestiobuf_done(eccline->bufs[i], len, error);
   1035 			eccline->bufs[i] = NULL;
   1036 		}
   1037 	}
   1038 	KASSERT(eccline->dirty == 0);
   1039 
   1040 	KASSERT(error == 0);
   1041 	/*
   1042 	 * XXX TODO on write errors allocate a sparable entry
   1043 	 */
   1044 
   1045 	wakeup(eccline);
   1046 	udf_puteccline(eccline);
   1047 }
   1048 
   1049 
   1050 static void
   1051 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
   1052 {
   1053 	struct udf_mount *ump = eccline->ump;
   1054 	struct strat_private *priv = PRIV(ump);
   1055 	struct buf *buf, *nestbuf;
   1056 	uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
   1057 	uint32_t start;
   1058 	int sector_size = ump->discinfo.sector_size;
   1059 	int blks = sector_size / DEV_BSIZE;
   1060 	int i;
   1061 
   1062 	if (queued_on == UDF_SHED_READING) {
   1063 		DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
   1064 		/* read all bits that are not yet present */
   1065 		eccline->readin = (~eccline->present) & allbits;
   1066 		KASSERT(eccline->readin);
   1067 		start = eccline->start_sector;
   1068 		buf = eccline->buf;
   1069 		buf_init(buf);
   1070 		buf->b_flags    = B_READ | B_ASYNC;
   1071 		buf->b_cflags   = BC_BUSY;	/* needed? */
   1072 		buf->b_oflags   = 0;
   1073 		buf->b_iodone   = udf_shedule_read_callback;
   1074 		buf->b_data     = eccline->blob;
   1075 		buf->b_bcount   = ump->packet_size * sector_size;
   1076 		buf->b_resid    = buf->b_bcount;
   1077 		buf->b_bufsize  = buf->b_bcount;
   1078 		buf->b_private  = eccline;
   1079 		BIO_SETPRIO(buf, BPRIO_DEFAULT);
   1080 		buf->b_lblkno   = buf->b_blkno = buf->b_rawblkno = start * blks;
   1081 		buf->b_proc     = NULL;
   1082 
   1083 		if (eccline->present != 0) {
   1084 			for (i = 0; i < ump->packet_size; i++) {
   1085 				bit = (uint64_t) 1 << i;
   1086 				if (eccline->present & bit) {
   1087 					nestiobuf_done(buf, sector_size, 0);
   1088 					continue;
   1089 				}
   1090 				nestbuf = getiobuf(NULL, true);
   1091 				nestiobuf_setup(buf, nestbuf, i * sector_size,
   1092 					sector_size);
   1093 				/* adjust blocknumber to read */
   1094 				nestbuf->b_blkno = buf->b_blkno + i*blks;
   1095 				nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
   1096 
   1097 				DPRINTF(SHEDULE, ("sector %d ",
   1098 					start + i));
   1099 				/* call asynchronous */
   1100 				VOP_STRATEGY(ump->devvp, nestbuf);
   1101 			}
   1102 			DPRINTF(SHEDULE, ("\n"));
   1103 			return;
   1104 		}
   1105 	} else {
   1106 		/* write or seqwrite */
   1107 		DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
   1108 		if (eccline->present != allbits) {
   1109 			/* requeue to read-only */
   1110 			DPRINTF(SHEDULE, ("\n\t-> not complete, requeue to "
   1111 				"reading\n"));
   1112 			udf_push_eccline(eccline, UDF_SHED_READING);
   1113 			return;
   1114 		}
   1115 		start = eccline->start_sector;
   1116 		buf = eccline->buf;
   1117 		buf_init(buf);
   1118 		buf->b_flags    = B_WRITE | B_ASYNC;
   1119 		buf->b_cflags   = BC_BUSY;	/* needed? */
   1120 		buf->b_oflags   = 0;
   1121 		buf->b_iodone   = udf_shedule_write_callback;
   1122 		buf->b_data     = eccline->blob;
   1123 		buf->b_bcount   = ump->packet_size * sector_size;
   1124 		buf->b_resid    = buf->b_bcount;
   1125 		buf->b_bufsize  = buf->b_bcount;
   1126 		buf->b_private  = eccline;
   1127 		BIO_SETPRIO(buf, BPRIO_DEFAULT);
   1128 		buf->b_lblkno   = buf->b_blkno = buf->b_rawblkno = start * blks;
   1129 		buf->b_proc     = NULL;
   1130 	}
   1131 
   1132 	mutex_exit(&priv->discstrat_mutex);
   1133 		/* call asynchronous */
   1134 		DPRINTF(SHEDULE, ("sector %d for %d\n",
   1135 			start, ump->packet_size));
   1136 		VOP_STRATEGY(ump->devvp, buf);
   1137 	mutex_enter(&priv->discstrat_mutex);
   1138 }
   1139 
   1140 
   1141 static void
   1142 udf_discstrat_thread(void *arg)
   1143 {
   1144 	struct udf_mount *ump = (struct udf_mount *) arg;
   1145 	struct strat_private *priv = PRIV(ump);
   1146 	struct udf_eccline *eccline;
   1147 	struct timespec now, *last;
   1148 	int new_queue, wait, work;
   1149 
   1150 	work = 1;
   1151 	mutex_enter(&priv->discstrat_mutex);
   1152 	priv->num_floating = 0;
   1153 	while (priv->run_thread || work || priv->num_floating) {
   1154 		/* process the current selected queue */
   1155 		/* maintenance: free exess ecclines */
   1156 		while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
   1157 			eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
   1158 			KASSERT(eccline);
   1159 			KASSERT(eccline->refcnt == 0);
   1160 			DPRINTF(ECCLINE, ("Removing entry from free list\n"));
   1161 			udf_dispose_eccline(eccline);
   1162 		}
   1163 
   1164 		/* get our time */
   1165 		vfs_timestamp(&now);
   1166 		last = &priv->last_queued[priv->cur_queue];
   1167 
   1168 		/* don't shedule too quickly when there is only one */
   1169 		if (priv->cur_queue == UDF_SHED_WRITING) {
   1170 			if (priv->num_queued[priv->cur_queue] <= 2) {
   1171 				if (now.tv_sec - last->tv_sec < 2) {
   1172 					/* wait some time */
   1173 					cv_timedwait(&priv->discstrat_cv,
   1174 						&priv->discstrat_mutex, hz);
   1175 				}
   1176 			}
   1177 		}
   1178 
   1179 		/* get our line */
   1180 		eccline = udf_pop_eccline(priv, priv->cur_queue);
   1181 		if (eccline) {
   1182 			wait = 0;
   1183 			new_queue = priv->cur_queue;
   1184 			DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
   1185 
   1186 			/* complete the `get' by locking and refcounting it */
   1187 			UDF_LOCK_ECCLINE(eccline);
   1188 			eccline->refcnt++;
   1189 
   1190 			udf_issue_eccline(eccline, priv->cur_queue);
   1191 		} else {
   1192 			wait = 1;
   1193 			/* check if we can/should switch */
   1194 			new_queue = priv->cur_queue;
   1195 			if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
   1196 				new_queue = UDF_SHED_READING;
   1197 			if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))
   1198 				new_queue = UDF_SHED_WRITING;
   1199 			if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
   1200 				new_queue = UDF_SHED_SEQWRITING;
   1201 
   1202 			/* dont switch seqwriting too fast */
   1203 			if (priv->cur_queue == UDF_SHED_READING) {
   1204 				if (now.tv_sec - last->tv_sec < 1)
   1205 					new_queue = priv->cur_queue;
   1206 			}
   1207 			if (priv->cur_queue == UDF_SHED_WRITING) {
   1208 				if (now.tv_sec - last->tv_sec < 2)
   1209 					new_queue = priv->cur_queue;
   1210 			}
   1211 			if (priv->cur_queue == UDF_SHED_SEQWRITING) {
   1212 				if (now.tv_sec - last->tv_sec < 4)
   1213 					new_queue = priv->cur_queue;
   1214 			}
   1215 		}
   1216 
   1217 		/* give room */
   1218 		mutex_exit(&priv->discstrat_mutex);
   1219 
   1220 		if (new_queue != priv->cur_queue) {
   1221 			wait = 0;
   1222 			DPRINTF(SHEDULE, ("switching from %d to %d\n",
   1223 				priv->cur_queue, new_queue));
   1224 			priv->cur_queue = new_queue;
   1225 		}
   1226 		mutex_enter(&priv->discstrat_mutex);
   1227 
   1228 		/* wait for more if needed */
   1229 		if (wait)
   1230 			cv_timedwait(&priv->discstrat_cv,
   1231 				&priv->discstrat_mutex, hz);	/* /8 */
   1232 
   1233 		work  = (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL);
   1234 		work |= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL);
   1235 		work |= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
   1236 
   1237 		DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
   1238 			(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) != NULL),
   1239 			(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) != NULL),
   1240 			(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
   1241 			work, priv->num_floating));
   1242 	}
   1243 
   1244 	mutex_exit(&priv->discstrat_mutex);
   1245 
   1246 	/* tear down remaining ecclines */
   1247 	mutex_enter(&priv->discstrat_mutex);
   1248 	KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
   1249 	KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
   1250 	KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
   1251 	KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
   1252 
   1253 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_IDLE]) == NULL);
   1254 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
   1255 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
   1256 	KASSERT(BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
   1257 	eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
   1258 	while (eccline) {
   1259 		udf_dispose_eccline(eccline);
   1260 		eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
   1261 	}
   1262 	KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
   1263 	mutex_exit(&priv->discstrat_mutex);
   1264 
   1265 	priv->thread_finished = 1;
   1266 	wakeup(&priv->run_thread);
   1267 	kthread_exit(0);
   1268 	/* not reached */
   1269 }
   1270 
   1271 /* --------------------------------------------------------------------- */
   1272 
   1273 /*
   1274  * Buffer memory pool allocator.
   1275  */
   1276 
   1277 static void *
   1278 ecclinepool_page_alloc(struct pool *pp, int flags)
   1279 {
   1280         return (void *)uvm_km_alloc(kernel_map,
   1281             MAXBSIZE, MAXBSIZE,
   1282             ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
   1283 	    	| UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
   1284 }
   1285 
   1286 static void
   1287 ecclinepool_page_free(struct pool *pp, void *v)
   1288 {
   1289         uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
   1290 }
   1291 
   1292 static struct pool_allocator ecclinepool_allocator = {
   1293         .pa_alloc = ecclinepool_page_alloc,
   1294         .pa_free  = ecclinepool_page_free,
   1295         .pa_pagesz = MAXBSIZE,
   1296 };
   1297 
   1298 
   1299 static void
   1300 udf_discstrat_init_rmw(struct udf_strat_args *args)
   1301 {
   1302 	struct udf_mount *ump = args->ump;
   1303 	struct strat_private *priv = PRIV(ump);
   1304 	uint32_t lb_size, blobsize, hashline;
   1305 	int i;
   1306 
   1307 	KASSERT(ump);
   1308 	KASSERT(ump->logical_vol);
   1309 	KASSERT(priv == NULL);
   1310 
   1311 	lb_size = udf_rw32(ump->logical_vol->lb_size);
   1312 	blobsize = ump->packet_size * lb_size;
   1313 	KASSERT(lb_size > 0);
   1314 	KASSERT(ump->packet_size <= 64);
   1315 
   1316 	/* initialise our memory space */
   1317 	ump->strategy_private = malloc(sizeof(struct strat_private),
   1318 		M_UDFTEMP, M_WAITOK);
   1319 	priv = ump->strategy_private;
   1320 	memset(priv, 0 , sizeof(struct strat_private));
   1321 
   1322 	/* initialise locks */
   1323 	cv_init(&priv->discstrat_cv, "udfstrat");
   1324 	mutex_init(&priv->discstrat_mutex, MUTEX_DRIVER, IPL_BIO);
   1325 	mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
   1326 
   1327 	/* initialise struct eccline pool */
   1328 	pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
   1329 		0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
   1330 
   1331 	/* initialise eccline blob pool */
   1332 	pool_init(&priv->ecclineblob_pool, blobsize,
   1333 		0,0,0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
   1334 
   1335 	/* initialise main queues */
   1336 	for (i = 0; i < UDF_SHED_MAX; i++) {
   1337 		priv->num_queued[i] = 0;
   1338 		vfs_timestamp(&priv->last_queued[i]);
   1339 	}
   1340 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
   1341 		BUFQ_SORT_RAWBLOCK);
   1342 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
   1343 		BUFQ_SORT_RAWBLOCK);
   1344 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
   1345 
   1346 	/* initialise administrative queues */
   1347 	bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
   1348 	bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
   1349 
   1350 	for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
   1351 		LIST_INIT(&priv->eccline_hash[hashline]);
   1352 	}
   1353 
   1354 	/* create our disk strategy thread */
   1355 	priv->cur_queue = UDF_SHED_READING;
   1356 	priv->thread_finished = 0;
   1357 	priv->run_thread      = 1;
   1358 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
   1359 		udf_discstrat_thread, ump, &priv->queue_lwp,
   1360 		"%s", "udf_rw")) {
   1361 		panic("fork udf_rw");
   1362 	}
   1363 }
   1364 
   1365 
   1366 static void
   1367 udf_discstrat_finish_rmw(struct udf_strat_args *args)
   1368 {
   1369 	struct udf_mount *ump = args->ump;
   1370 	struct strat_private *priv = PRIV(ump);
   1371 	int error;
   1372 
   1373 	if (ump == NULL)
   1374 		return;
   1375 
   1376 	/* stop our sheduling thread */
   1377 	KASSERT(priv->run_thread == 1);
   1378 	priv->run_thread = 0;
   1379 	wakeup(priv->queue_lwp);
   1380 	while (!priv->thread_finished) {
   1381 		error = tsleep(&priv->run_thread, PRIBIO+1,
   1382 			"udfshedfin", hz);
   1383 	}
   1384 	/* kthread should be finished now */
   1385 
   1386 	/* cleanup our pools */
   1387 	pool_destroy(&priv->eccline_pool);
   1388 	pool_destroy(&priv->ecclineblob_pool);
   1389 
   1390 	cv_destroy(&priv->discstrat_cv);
   1391 	mutex_destroy(&priv->discstrat_mutex);
   1392 	mutex_destroy(&priv->seqwrite_mutex);
   1393 
   1394 	/* free our private space */
   1395 	free(ump->strategy_private, M_UDFTEMP);
   1396 	ump->strategy_private = NULL;
   1397 }
   1398 
   1399 /* --------------------------------------------------------------------- */
   1400 
   1401 struct udf_strategy udf_strat_rmw =
   1402 {
   1403 	udf_create_logvol_dscr_rmw,
   1404 	udf_free_logvol_dscr_rmw,
   1405 	udf_read_logvol_dscr_rmw,
   1406 	udf_write_logvol_dscr_rmw,
   1407 	udf_queuebuf_rmw,
   1408 	udf_discstrat_init_rmw,
   1409 	udf_discstrat_finish_rmw
   1410 };
   1411 
   1412