Home | History | Annotate | Line # | Download | only in udf
udf_strat_sequential.c revision 1.2
      1 /* $NetBSD: udf_strat_sequential.c,v 1.2 2008/07/07 18:45:27 reinoud Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2006, 2008 Reinoud Zandijk
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #ifndef lint
     31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.2 2008/07/07 18:45:27 reinoud Exp $");
     32 #endif /* not lint */
     33 
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_quota.h"
     37 #include "opt_compat_netbsd.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/sysctl.h>
     43 #include <sys/namei.h>
     44 #include <sys/proc.h>
     45 #include <sys/kernel.h>
     46 #include <sys/vnode.h>
     47 #include <miscfs/genfs/genfs_node.h>
     48 #include <sys/mount.h>
     49 #include <sys/buf.h>
     50 #include <sys/file.h>
     51 #include <sys/device.h>
     52 #include <sys/disklabel.h>
     53 #include <sys/ioctl.h>
     54 #include <sys/malloc.h>
     55 #include <sys/dirent.h>
     56 #include <sys/stat.h>
     57 #include <sys/conf.h>
     58 #include <sys/kauth.h>
     59 #include <sys/kthread.h>
     60 #include <dev/clock_subr.h>
     61 
     62 #include <fs/udf/ecma167-udf.h>
     63 #include <fs/udf/udf_mount.h>
     64 
     65 #if defined(_KERNEL_OPT)
     66 #include "opt_udf.h"
     67 #endif
     68 
     69 #include "udf.h"
     70 #include "udf_subr.h"
     71 #include "udf_bswap.h"
     72 
     73 
     74 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
     75 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
     76 
     77 /* --------------------------------------------------------------------- */
     78 
     79 /* BUFQ's */
     80 #define UDF_SHED_MAX 3
     81 
     82 #define UDF_SHED_READING	0
     83 #define UDF_SHED_WRITING	1
     84 #define UDF_SHED_SEQWRITING	2
     85 
     86 struct strat_private {
     87 	struct pool		 desc_pool;	 	/* node descriptors */
     88 
     89 	lwp_t			*queue_lwp;
     90 	kcondvar_t		 discstrat_cv;		/* to wait on       */
     91 	kmutex_t		 discstrat_mutex;	/* disc strategy    */
     92 
     93 	int			 run_thread;		/* thread control */
     94 	int			 cur_queue;
     95 
     96 	struct disk_strategy	 old_strategy_setting;
     97 	struct bufq_state	*queues[UDF_SHED_MAX];
     98 	struct timespec		 last_queued[UDF_SHED_MAX];
     99 };
    100 
    101 
    102 /* --------------------------------------------------------------------- */
    103 
    104 static void
    105 udf_wr_nodedscr_callback(struct buf *buf)
    106 {
    107 	struct udf_node *udf_node;
    108 
    109 	KASSERT(buf);
    110 	KASSERT(buf->b_data);
    111 
    112 	/* called when write action is done */
    113 	DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
    114 
    115 	udf_node = VTOI(buf->b_vp);
    116 	if (udf_node == NULL) {
    117 		putiobuf(buf);
    118 		printf("udf_wr_node_callback: NULL node?\n");
    119 		return;
    120 	}
    121 
    122 	/* XXX right flags to mark dirty again on error? */
    123 	if (buf->b_error) {
    124 		udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
    125 		/* XXX TODO reshedule on error */
    126 	}
    127 
    128 	/* decrement outstanding_nodedscr */
    129 	KASSERT(udf_node->outstanding_nodedscr >= 1);
    130 	udf_node->outstanding_nodedscr--;
    131 	if (udf_node->outstanding_nodedscr == 0) {
    132 		/* first unlock the node */
    133 		KASSERT(udf_node->i_flags & IN_CALLBACK_ULK);
    134 		UDF_UNLOCK_NODE(udf_node, IN_CALLBACK_ULK);
    135 
    136 		wakeup(&udf_node->outstanding_nodedscr);
    137 	}
    138 
    139 	/* unreference the vnode so it can be recycled */
    140 	holdrele(udf_node->vnode);
    141 
    142 	putiobuf(buf);
    143 }
    144 
    145 /* --------------------------------------------------------------------- */
    146 
    147 static int
    148 udf_create_logvol_dscr_seq(struct udf_strat_args *args)
    149 {
    150 	union dscrptr   **dscrptr = &args->dscr;
    151 	struct udf_mount *ump = args->ump;
    152 	struct strat_private *priv = PRIV(ump);
    153 	uint32_t lb_size;
    154 
    155 	lb_size = udf_rw32(ump->logical_vol->lb_size);
    156 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
    157 	memset(*dscrptr, 0, lb_size);
    158 
    159 	return 0;
    160 }
    161 
    162 
    163 static void
    164 udf_free_logvol_dscr_seq(struct udf_strat_args *args)
    165 {
    166 	union dscrptr    *dscr = args->dscr;
    167 	struct udf_mount *ump  = args->ump;
    168 	struct strat_private *priv = PRIV(ump);
    169 
    170 	pool_put(&priv->desc_pool, dscr);
    171 }
    172 
    173 
    174 static int
    175 udf_read_logvol_dscr_seq(struct udf_strat_args *args)
    176 {
    177 	union dscrptr   **dscrptr = &args->dscr;
    178 	union dscrptr    *tmpdscr;
    179 	struct udf_mount *ump = args->ump;
    180 	struct long_ad   *icb = args->icb;
    181 	struct strat_private *priv = PRIV(ump);
    182 	uint32_t lb_size;
    183 	uint32_t sector, dummy;
    184 	int error;
    185 
    186 	lb_size = udf_rw32(ump->logical_vol->lb_size);
    187 
    188 	error = udf_translate_vtop(ump, icb, &sector, &dummy);
    189 	if (error)
    190 		return error;
    191 
    192 	/* try to read in fe/efe */
    193 	error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
    194 	if (error)
    195 		return error;
    196 
    197 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
    198 	memcpy(*dscrptr, tmpdscr, lb_size);
    199 	free(tmpdscr, M_UDFTEMP);
    200 
    201 	return 0;
    202 }
    203 
    204 
    205 static int
    206 udf_write_logvol_dscr_seq(struct udf_strat_args *args)
    207 {
    208 	union dscrptr    *dscr     = args->dscr;
    209 	struct udf_mount *ump      = args->ump;
    210 	struct udf_node  *udf_node = args->udf_node;
    211 	struct long_ad   *icb      = args->icb;
    212 	int               waitfor  = args->waitfor;
    213 	uint32_t logsectornr, sectornr, dummy;
    214 	int error, vpart;
    215 
    216 	/*
    217 	 * we have to decide if we write it out sequential or at its fixed
    218 	 * position by examining the partition its (to be) written on.
    219 	 */
    220 	vpart       = udf_rw16(udf_node->loc.loc.part_num);
    221 	logsectornr = udf_rw32(icb->loc.lb_num);
    222 	sectornr    = 0;
    223 	if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
    224 		error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    225 		if (error)
    226 			goto out;
    227 	}
    228 
    229 	/* add reference to the vnode to prevent recycling */
    230 	vhold(udf_node->vnode);
    231 
    232 	if (waitfor) {
    233 		DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
    234 
    235 		error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
    236 			dscr, sectornr, logsectornr);
    237 	} else {
    238 		DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
    239 
    240 		error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
    241 			dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
    242 		/* will be UNLOCKED in call back */
    243 		return error;
    244 	}
    245 
    246 	holdrele(udf_node->vnode);
    247 out:
    248 	udf_node->outstanding_nodedscr--;
    249 	if (udf_node->outstanding_nodedscr == 0) {
    250 		UDF_UNLOCK_NODE(udf_node, 0);
    251 		wakeup(&udf_node->outstanding_nodedscr);
    252 	}
    253 
    254 	return error;
    255 }
    256 
    257 /* --------------------------------------------------------------------- */
    258 
    259 /*
    260  * Main file-system specific sheduler. Due to the nature of optical media
    261  * sheduling can't be performed in the traditional way. Most OS
    262  * implementations i've seen thus read or write a file atomically giving all
    263  * kinds of side effects.
    264  *
    265  * This implementation uses a kernel thread to shedule the queued requests in
    266  * such a way that is semi-optimal for optical media; this means aproximately
    267  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
    268  * time.
    269  */
    270 
    271 static void
    272 udf_queuebuf_seq(struct udf_strat_args *args)
    273 {
    274 	struct udf_mount *ump = args->ump;
    275 	struct buf *nestbuf = args->nestbuf;
    276 	struct strat_private *priv = PRIV(ump);
    277 	int queue;
    278 	int what;
    279 
    280 	KASSERT(ump);
    281 	KASSERT(nestbuf);
    282 	KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
    283 
    284 	what = nestbuf->b_udf_c_type;
    285 	queue = UDF_SHED_READING;
    286 	if ((nestbuf->b_flags & B_READ) == 0) {
    287 		/* writing */
    288 		queue = UDF_SHED_SEQWRITING;
    289 		if (what == UDF_C_DSCR)
    290 			queue = UDF_SHED_WRITING;
    291 		if (what == UDF_C_NODE) {
    292 			if (ump->meta_alloc != UDF_ALLOC_VAT)
    293 				queue = UDF_SHED_WRITING;
    294 		}
    295 #if 0
    296 		if (queue == UDF_SHED_SEQWRITING) {
    297 			/* TODO do add sector to uncommitted space */
    298 		}
    299 #endif
    300 	}
    301 
    302 	/* use our own sheduler lists for more complex sheduling */
    303 	mutex_enter(&priv->discstrat_mutex);
    304 		BUFQ_PUT(priv->queues[queue], nestbuf);
    305 		vfs_timestamp(&priv->last_queued[queue]);
    306 	mutex_exit(&priv->discstrat_mutex);
    307 
    308 	/* signal our thread that there might be something to do */
    309 	cv_signal(&priv->discstrat_cv);
    310 }
    311 
    312 /* --------------------------------------------------------------------- */
    313 
    314 /* TODO convert to lb_size */
    315 static void
    316 udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf)
    317 {
    318 	union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
    319 	struct vnode     *vp = buf->b_vp;
    320 	struct udf_node  *udf_node = VTOI(vp);
    321 	struct part_desc *pdesc;
    322 	uint32_t lb_size, blks;
    323 	uint32_t lb_num, lb_map;
    324 	uint32_t udf_rw32_lbmap;
    325 	int c_type = buf->b_udf_c_type;
    326 	int error;
    327 
    328 	/* only interested when we're using a VAT */
    329 	if (ump->meta_alloc != UDF_ALLOC_VAT)
    330 		return;
    331 	KASSERT(ump->vat_node);
    332 
    333 	/* only nodes are recorded in the VAT */
    334 	/* NOTE: and the fileset descriptor (FIXME ?) */
    335 	if (c_type != UDF_C_NODE)
    336 		return;
    337 
    338 	/* we now have an UDF FE/EFE node on media with VAT (or VAT itself) */
    339 	lb_size = udf_rw32(ump->logical_vol->lb_size);
    340 	blks = lb_size / DEV_BSIZE;
    341 
    342 	/* calculate offset from base partition */
    343 	pdesc = ump->partitions[ump->vtop[ump->metadata_part]];
    344 	lb_map  = buf->b_blkno / blks;
    345 	lb_map -= udf_rw32(pdesc->start_loc);
    346 
    347 	udf_rw32_lbmap = udf_rw32(lb_map);
    348 
    349 	/* if we're the VAT itself, only update our assigned sector number */
    350 	if (udf_node == ump->vat_node) {
    351 		fdscr->tag.tag_loc = udf_rw32_lbmap;
    352 		udf_validate_tag_sum(fdscr);
    353 		DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
    354 			udf_rw32(udf_rw32_lbmap)));
    355 		/* no use mapping the VAT node in the VAT */
    356 		return;
    357 	}
    358 
    359 	/* record new position in VAT file */
    360 	lb_num = udf_rw32(fdscr->tag.tag_loc);
    361 
    362 	/* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
    363 
    364 	DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
    365 			lb_num, lb_map));
    366 
    367 	/* VAT should be the longer than this write, can't go wrong */
    368 	KASSERT(lb_num <= ump->vat_entries);
    369 
    370 	mutex_enter(&ump->allocate_mutex);
    371 	error = udf_vat_write(ump->vat_node,
    372 			(uint8_t *) &udf_rw32_lbmap, 4,
    373 			ump->vat_offset + lb_num * 4);
    374 	mutex_exit(&ump->allocate_mutex);
    375 
    376 	if (error)
    377 		panic( "udf_VAT_mapping_update: HELP! i couldn't "
    378 			"write in the VAT file ?\n");
    379 }
    380 
    381 
    382 static void
    383 udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
    384 {
    385 	struct long_ad *node_ad_cpy;
    386 	uint64_t *lmapping, *pmapping, *lmappos, blknr;
    387 	uint32_t our_sectornr, sectornr, bpos;
    388 	uint8_t *fidblk;
    389 	int sector_size = ump->discinfo.sector_size;
    390 	int blks = sector_size / DEV_BSIZE;
    391 	int len, buf_len;
    392 
    393 	/* if reading, just pass to the device's STRATEGY */
    394 	if (queue == UDF_SHED_READING) {
    395 		DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
    396 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
    397 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    398 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    399 		VOP_STRATEGY(ump->devvp, buf);
    400 		return;
    401 	}
    402 
    403 	blknr        = buf->b_blkno;
    404 	our_sectornr = blknr / blks;
    405 
    406 	if (queue == UDF_SHED_WRITING) {
    407 		DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
    408 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    409 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    410 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    411 		/* if we have FIDs fixup using buffer's sector number(s) */
    412 		if (buf->b_udf_c_type == UDF_C_FIDS) {
    413 			panic("UDF_C_FIDS in SHED_WRITING!\n");
    414 			buf_len = buf->b_bcount;
    415 			sectornr = our_sectornr;
    416 			bpos = 0;
    417 			while (buf_len) {
    418 				len = MIN(buf_len, sector_size);
    419 				fidblk = (uint8_t *) buf->b_data + bpos;
    420 				udf_fixup_fid_block(fidblk, sector_size,
    421 					0, len, sectornr);
    422 				sectornr++;
    423 				bpos += len;
    424 				buf_len -= len;
    425 			}
    426 		}
    427 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    428 		VOP_STRATEGY(ump->devvp, buf);
    429 		return;
    430 	}
    431 
    432 	KASSERT(queue == UDF_SHED_SEQWRITING);
    433 	DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
    434 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    435 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
    436 		buf->b_bufsize));
    437 
    438 	/*
    439 	 * Buffers should not have been allocated to disc addresses yet on
    440 	 * this queue. Note that a buffer can get multiple extents allocated.
    441 	 *
    442 	 * lmapping contains lb_num relative to base partition.
    443 	 * pmapping contains lb_num as used for disc adressing.
    444 	 */
    445 	lmapping    = ump->la_lmapping;
    446 	pmapping    = ump->la_pmapping;
    447 	node_ad_cpy = ump->la_node_ad_cpy;
    448 
    449 	/* allocate buf and get its logical and physical mappings */
    450 	udf_late_allocate_buf(ump, buf, lmapping, pmapping, node_ad_cpy);
    451 	udf_VAT_mapping_update(ump, buf);	/* XXX could pass *lmapping */
    452 
    453 	/* if we have FIDs, fixup using the new allocation table */
    454 	if (buf->b_udf_c_type == UDF_C_FIDS) {
    455 		buf_len = buf->b_bcount;
    456 		bpos = 0;
    457 		lmappos = lmapping;
    458 		while (buf_len) {
    459 			sectornr = *lmappos++;
    460 			len = MIN(buf_len, sector_size);
    461 			fidblk = (uint8_t *) buf->b_data + bpos;
    462 			udf_fixup_fid_block(fidblk, sector_size,
    463 				0, len, sectornr);
    464 			bpos += len;
    465 			buf_len -= len;
    466 		}
    467 	}
    468 	udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    469 	VOP_STRATEGY(ump->devvp, buf);
    470 }
    471 
    472 
    473 static void
    474 udf_doshedule(struct udf_mount *ump)
    475 {
    476 	struct buf *buf;
    477 	struct timespec now, *last;
    478 	struct strat_private *priv = PRIV(ump);
    479 	void (*b_callback)(struct buf *);
    480 	int new_queue;
    481 	int error;
    482 
    483 	buf = BUFQ_GET(priv->queues[priv->cur_queue]);
    484 	if (buf) {
    485 		/* transfer from the current queue to the device queue */
    486 		mutex_exit(&priv->discstrat_mutex);
    487 
    488 		/* transform buffer to synchronous; XXX needed? */
    489 		b_callback = buf->b_iodone;
    490 		buf->b_iodone = NULL;
    491 		CLR(buf->b_flags, B_ASYNC);
    492 
    493 		/* issue and wait on completion */
    494 		udf_issue_buf(ump, priv->cur_queue, buf);
    495 		biowait(buf);
    496 
    497 		mutex_enter(&priv->discstrat_mutex);
    498 
    499 		/* if there is an error, repair this error, otherwise propagate */
    500 		if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
    501 			/* check what we need to do */
    502 			panic("UDF write error, can't handle yet!\n");
    503 		}
    504 
    505 		/* propagate result to higher layers */
    506 		if (b_callback) {
    507 			buf->b_iodone = b_callback;
    508 			(*buf->b_iodone)(buf);
    509 		}
    510 
    511 		return;
    512 	}
    513 
    514 	/* Check if we're idling in this state */
    515 	vfs_timestamp(&now);
    516 	last = &priv->last_queued[priv->cur_queue];
    517 	if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
    518 		/* dont switch too fast for CD media; its expensive in time */
    519 		if (now.tv_sec - last->tv_sec < 3)
    520 			return;
    521 	}
    522 
    523 	/* check if we can/should switch */
    524 	new_queue = priv->cur_queue;
    525 
    526 	if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
    527 		new_queue = UDF_SHED_READING;
    528 	if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
    529 		new_queue = UDF_SHED_SEQWRITING;
    530 	if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))		/* only for unmount */
    531 		new_queue = UDF_SHED_WRITING;
    532 	if (priv->cur_queue == UDF_SHED_READING) {
    533 		if (new_queue == UDF_SHED_SEQWRITING) {
    534 			/* TODO use flag to signal if this is needed */
    535 			mutex_exit(&priv->discstrat_mutex);
    536 
    537 			/* update trackinfo for data and metadata */
    538 			error = udf_update_trackinfo(ump,
    539 					&ump->data_track);
    540 			assert(error == 0);
    541 			error = udf_update_trackinfo(ump,
    542 					&ump->metadata_track);
    543 			assert(error == 0);
    544 			mutex_enter(&priv->discstrat_mutex);
    545 		}
    546 	}
    547 
    548 	if (new_queue != priv->cur_queue) {
    549 		DPRINTF(SHEDULE, ("switching from %d to %d\n",
    550 			priv->cur_queue, new_queue));
    551 	}
    552 
    553 	priv->cur_queue = new_queue;
    554 }
    555 
    556 
    557 static void
    558 udf_discstrat_thread(void *arg)
    559 {
    560 	struct udf_mount *ump = (struct udf_mount *) arg;
    561 	struct strat_private *priv = PRIV(ump);
    562 	int empty;
    563 
    564 	empty = 1;
    565 	mutex_enter(&priv->discstrat_mutex);
    566 	while (priv->run_thread || !empty) {
    567 		/* process the current selected queue */
    568 		udf_doshedule(ump);
    569 		empty  = (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
    570 		empty &= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
    571 		empty &= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
    572 
    573 		/* wait for more if needed */
    574 		if (empty)
    575 			cv_timedwait(&priv->discstrat_cv,
    576 				&priv->discstrat_mutex, hz/8);
    577 	}
    578 	mutex_exit(&priv->discstrat_mutex);
    579 
    580 	wakeup(&priv->run_thread);
    581 	kthread_exit(0);
    582 	/* not reached */
    583 }
    584 
    585 /* --------------------------------------------------------------------- */
    586 
    587 static void
    588 udf_discstrat_init_seq(struct udf_strat_args *args)
    589 {
    590 	struct udf_mount *ump = args->ump;
    591 	struct strat_private *priv = PRIV(ump);
    592 	struct disk_strategy dkstrat;
    593 	uint32_t lb_size;
    594 
    595 	KASSERT(ump);
    596 	KASSERT(ump->logical_vol);
    597 	KASSERT(priv == NULL);
    598 
    599 	lb_size = udf_rw32(ump->logical_vol->lb_size);
    600 	KASSERT(lb_size > 0);
    601 
    602 	/* initialise our memory space */
    603 	ump->strategy_private = malloc(sizeof(struct strat_private),
    604 		M_UDFTEMP, M_WAITOK);
    605 	priv = ump->strategy_private;
    606 	memset(priv, 0 , sizeof(struct strat_private));
    607 
    608 	/* initialise locks */
    609 	cv_init(&priv->discstrat_cv, "udfstrat");
    610 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
    611 
    612 	/*
    613 	 * Initialise pool for descriptors associated with nodes. This is done
    614 	 * in lb_size units though currently lb_size is dictated to be
    615 	 * sector_size.
    616 	 */
    617 	pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
    618 	    IPL_NONE);
    619 
    620 	/*
    621 	 * remember old device strategy method and explicit set method
    622 	 * `discsort' since we have our own more complex strategy that is not
    623 	 * implementable on the CD device and other strategies will get in the
    624 	 * way.
    625 	 */
    626 	memset(&priv->old_strategy_setting, 0,
    627 		sizeof(struct disk_strategy));
    628 	VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
    629 		FREAD | FKIOCTL, NOCRED);
    630 	memset(&dkstrat, 0, sizeof(struct disk_strategy));
    631 	strcpy(dkstrat.dks_name, "discsort");
    632 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
    633 		NOCRED);
    634 
    635 	/* initialise our internal sheduler */
    636 	priv->cur_queue = UDF_SHED_READING;
    637 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
    638 		BUFQ_SORT_RAWBLOCK);
    639 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
    640 		BUFQ_SORT_RAWBLOCK);
    641 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
    642 	vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
    643 	vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
    644 	vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
    645 
    646 	/* create our disk strategy thread */
    647 	priv->run_thread = 1;
    648 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
    649 		udf_discstrat_thread, ump, &priv->queue_lwp,
    650 		"%s", "udf_rw")) {
    651 		panic("fork udf_rw");
    652 	}
    653 }
    654 
    655 
    656 static void
    657 udf_discstrat_finish_seq(struct udf_strat_args *args)
    658 {
    659 	struct udf_mount *ump = args->ump;
    660 	struct strat_private *priv = PRIV(ump);
    661 	int error;
    662 
    663 	if (ump == NULL)
    664 		return;
    665 
    666 	/* stop our sheduling thread */
    667 	KASSERT(priv->run_thread == 1);
    668 	priv->run_thread = 0;
    669 	wakeup(priv->queue_lwp);
    670 	do {
    671 		error = tsleep(&priv->run_thread, PRIBIO+1,
    672 			"udfshedfin", hz);
    673 	} while (error);
    674 	/* kthread should be finished now */
    675 
    676 	/* set back old device strategy method */
    677 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
    678 			FWRITE, NOCRED);
    679 
    680 	/* destroy our pool */
    681 	pool_destroy(&priv->desc_pool);
    682 
    683 	/* free our private space */
    684 	free(ump->strategy_private, M_UDFTEMP);
    685 	ump->strategy_private = NULL;
    686 }
    687 
    688 /* --------------------------------------------------------------------- */
    689 
    690 struct udf_strategy udf_strat_sequential =
    691 {
    692 	udf_create_logvol_dscr_seq,
    693 	udf_free_logvol_dscr_seq,
    694 	udf_read_logvol_dscr_seq,
    695 	udf_write_logvol_dscr_seq,
    696 	udf_queuebuf_seq,
    697 	udf_discstrat_init_seq,
    698 	udf_discstrat_finish_seq
    699 };
    700 
    701 
    702