Home | History | Annotate | Line # | Download | only in udf
      1 /* $NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2006, 2008 Reinoud Zandijk
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #ifndef lint
     31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $");
     32 #endif /* not lint */
     33 
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_compat_netbsd.h"
     37 #endif
     38 
     39 #include <sys/param.h>
     40 #include <sys/systm.h>
     41 #include <sys/sysctl.h>
     42 #include <sys/namei.h>
     43 #include <sys/proc.h>
     44 #include <sys/kernel.h>
     45 #include <sys/vnode.h>
     46 #include <miscfs/genfs/genfs_node.h>
     47 #include <sys/mount.h>
     48 #include <sys/buf.h>
     49 #include <sys/file.h>
     50 #include <sys/device.h>
     51 #include <sys/disklabel.h>
     52 #include <sys/ioctl.h>
     53 #include <sys/malloc.h>
     54 #include <sys/dirent.h>
     55 #include <sys/stat.h>
     56 #include <sys/conf.h>
     57 #include <sys/kauth.h>
     58 #include <sys/kthread.h>
     59 #include <dev/clock_subr.h>
     60 
     61 #include <fs/udf/ecma167-udf.h>
     62 #include <fs/udf/udf_mount.h>
     63 
     64 #include "udf.h"
     65 #include "udf_subr.h"
     66 #include "udf_bswap.h"
     67 
     68 
     69 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
     70 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
     71 
     72 /* --------------------------------------------------------------------- */
     73 
     74 /* BUFQ's */
     75 #define UDF_SHED_MAX 3
     76 
     77 #define UDF_SHED_READING	0
     78 #define UDF_SHED_WRITING	1
     79 #define UDF_SHED_SEQWRITING	2
     80 
     81 struct strat_private {
     82 	struct pool		 desc_pool;	 	/* node descriptors */
     83 
     84 	lwp_t			*queue_lwp;
     85 	kcondvar_t		 discstrat_cv;		/* to wait on       */
     86 	kmutex_t		 discstrat_mutex;	/* disc strategy    */
     87 
     88 	int			 thread_running;	/* thread control */
     89 	int			 run_thread;		/* thread control */
     90 	int			 thread_finished;	/* thread control */
     91 
     92 	int			 sync_req;		/* thread control */
     93 	int			 cur_queue;
     94 
     95 	struct disk_strategy	 old_strategy_setting;
     96 	struct bufq_state	*queues[UDF_SHED_MAX];
     97 	struct timespec		 last_queued[UDF_SHED_MAX];
     98 };
     99 
    100 
    101 /* --------------------------------------------------------------------- */
    102 
    103 static void
    104 udf_wr_nodedscr_callback(struct buf *buf)
    105 {
    106 	struct udf_node *udf_node;
    107 
    108 	KASSERT(buf);
    109 	KASSERT(buf->b_data);
    110 
    111 	/* called when write action is done */
    112 	DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
    113 
    114 	udf_node = VTOI(buf->b_vp);
    115 	if (udf_node == NULL) {
    116 		putiobuf(buf);
    117 		printf("udf_wr_node_callback: NULL node?\n");
    118 		return;
    119 	}
    120 
    121 	/* XXX right flags to mark dirty again on error? */
    122 	if (buf->b_error) {
    123 		udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
    124 		/* XXX TODO reschedule on error */
    125 	}
    126 
    127 	/* decrement outstanding_nodedscr */
    128 	KASSERT(udf_node->outstanding_nodedscr >= 1);
    129 	udf_node->outstanding_nodedscr--;
    130 	if (udf_node->outstanding_nodedscr == 0) {
    131 		/* first unlock the node */
    132 		UDF_UNLOCK_NODE(udf_node, 0);
    133 		cv_broadcast(&udf_node->node_lock);
    134 	}
    135 
    136 	putiobuf(buf);
    137 }
    138 
    139 /* --------------------------------------------------------------------- */
    140 
    141 static int
    142 udf_create_logvol_dscr_seq(struct udf_strat_args *args)
    143 {
    144 	union dscrptr   **dscrptr = &args->dscr;
    145 	struct udf_mount *ump = args->ump;
    146 	struct strat_private *priv = PRIV(ump);
    147 	uint32_t lb_size;
    148 
    149 	lb_size = udf_rw32(ump->logical_vol->lb_size);
    150 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
    151 	memset(*dscrptr, 0, lb_size);
    152 
    153 	return 0;
    154 }
    155 
    156 
    157 static void
    158 udf_free_logvol_dscr_seq(struct udf_strat_args *args)
    159 {
    160 	union dscrptr    *dscr = args->dscr;
    161 	struct udf_mount *ump  = args->ump;
    162 	struct strat_private *priv = PRIV(ump);
    163 
    164 	pool_put(&priv->desc_pool, dscr);
    165 }
    166 
    167 
    168 static int
    169 udf_read_logvol_dscr_seq(struct udf_strat_args *args)
    170 {
    171 	union dscrptr   **dscrptr = &args->dscr;
    172 	union dscrptr    *tmpdscr;
    173 	struct udf_mount *ump = args->ump;
    174 	struct long_ad   *icb = args->icb;
    175 	struct strat_private *priv = PRIV(ump);
    176 	uint32_t lb_size;
    177 	uint32_t sector, dummy;
    178 	int error;
    179 
    180 	lb_size = udf_rw32(ump->logical_vol->lb_size);
    181 
    182 	error = udf_translate_vtop(ump, icb, &sector, &dummy);
    183 	if (error)
    184 		return error;
    185 
    186 	/* try to read in fe/efe */
    187 	error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
    188 	if (error)
    189 		return error;
    190 
    191 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
    192 	memcpy(*dscrptr, tmpdscr, lb_size);
    193 	free(tmpdscr, M_UDFTEMP);
    194 
    195 	return 0;
    196 }
    197 
    198 
    199 static int
    200 udf_write_logvol_dscr_seq(struct udf_strat_args *args)
    201 {
    202 	union dscrptr    *dscr     = args->dscr;
    203 	struct udf_mount *ump      = args->ump;
    204 	struct udf_node  *udf_node = args->udf_node;
    205 	struct long_ad   *icb      = args->icb;
    206 	int               waitfor  = args->waitfor;
    207 	uint32_t logsectornr, sectornr, dummy;
    208 	int error, vpart;
    209 
    210 	/*
    211 	 * we have to decide if we write it out sequential or at its fixed
    212 	 * position by examining the partition its (to be) written on.
    213 	 */
    214 	vpart       = udf_rw16(udf_node->loc.loc.part_num);
    215 	logsectornr = udf_rw32(icb->loc.lb_num);
    216 	sectornr    = 0;
    217 	if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
    218 		error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
    219 		if (error)
    220 			goto out;
    221 	}
    222 
    223 	if (waitfor) {
    224 		DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
    225 
    226 		error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
    227 			dscr, sectornr, logsectornr);
    228 	} else {
    229 		DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
    230 
    231 		error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
    232 			dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
    233 		/* will be UNLOCKED in call back */
    234 		return error;
    235 	}
    236 out:
    237 	udf_node->outstanding_nodedscr--;
    238 	if (udf_node->outstanding_nodedscr == 0) {
    239 		UDF_UNLOCK_NODE(udf_node, 0);
    240 		cv_broadcast(&udf_node->node_lock);
    241 	}
    242 
    243 	return error;
    244 }
    245 
    246 /* --------------------------------------------------------------------- */
    247 
    248 /*
    249  * Main file-system specific scheduler. Due to the nature of optical media
    250  * scheduling can't be performed in the traditional way. Most OS
    251  * implementations i've seen thus read or write a file atomically giving all
    252  * kinds of side effects.
    253  *
    254  * This implementation uses a kernel thread to schedule the queued requests in
    255  * such a way that is semi-optimal for optical media; this means approximately
    256  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
    257  * time.
    258  */
    259 
    260 static void
    261 udf_queuebuf_seq(struct udf_strat_args *args)
    262 {
    263 	struct udf_mount *ump = args->ump;
    264 	struct buf *nestbuf = args->nestbuf;
    265 	struct strat_private *priv = PRIV(ump);
    266 	int queue;
    267 	int what;
    268 
    269 	KASSERT(ump);
    270 	KASSERT(nestbuf);
    271 	KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
    272 
    273 	what = nestbuf->b_udf_c_type;
    274 	queue = UDF_SHED_READING;
    275 	if ((nestbuf->b_flags & B_READ) == 0) {
    276 		/* writing */
    277 		queue = UDF_SHED_SEQWRITING;
    278 		if (what == UDF_C_ABSOLUTE)
    279 			queue = UDF_SHED_WRITING;
    280 	}
    281 
    282 	/* use our own scheduler lists for more complex scheduling */
    283 	mutex_enter(&priv->discstrat_mutex);
    284 		bufq_put(priv->queues[queue], nestbuf);
    285 		vfs_timestamp(&priv->last_queued[queue]);
    286 	mutex_exit(&priv->discstrat_mutex);
    287 
    288 	/* signal our thread that there might be something to do */
    289 	cv_signal(&priv->discstrat_cv);
    290 }
    291 
    292 /* --------------------------------------------------------------------- */
    293 
    294 static void
    295 udf_sync_caches_seq(struct udf_strat_args *args)
    296 {
    297 	struct udf_mount *ump = args->ump;
    298 	struct strat_private *priv = PRIV(ump);
    299 
    300 	/* we might be called during unmount inadvertedly, be on safe side */
    301 	if (!priv)
    302 		return;
    303 
    304 	/* signal our thread that there might be something to do */
    305 	priv->sync_req = 1;
    306 	cv_signal(&priv->discstrat_cv);
    307 
    308 	mutex_enter(&priv->discstrat_mutex);
    309 		while (priv->sync_req) {
    310 			cv_timedwait(&priv->discstrat_cv,
    311 				&priv->discstrat_mutex, hz/8);
    312 		}
    313 	mutex_exit(&priv->discstrat_mutex);
    314 }
    315 
    316 /* --------------------------------------------------------------------- */
    317 
    318 /* TODO convert to lb_size */
    319 static void
    320 udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf, uint32_t lb_map)
    321 {
    322 	union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
    323 	struct vnode     *vp = buf->b_vp;
    324 	struct udf_node  *udf_node = VTOI(vp);
    325 	uint32_t lb_num;
    326 	uint32_t udf_rw32_lbmap;
    327 	int c_type = buf->b_udf_c_type;
    328 	int error;
    329 
    330 	/* only interested when we're using a VAT */
    331 	KASSERT(ump->vat_node);
    332 	KASSERT(ump->vtop_alloc[ump->node_part] == UDF_ALLOC_VAT);
    333 
    334 	/* only nodes are recorded in the VAT */
    335 	/* NOTE: and the fileset descriptor (FIXME ?) */
    336 	if (c_type != UDF_C_NODE)
    337 		return;
    338 
    339 	udf_rw32_lbmap = udf_rw32(lb_map);
    340 
    341 	/* if we're the VAT itself, only update our assigned sector number */
    342 	if (udf_node == ump->vat_node) {
    343 		fdscr->tag.tag_loc = udf_rw32_lbmap;
    344 		udf_validate_tag_sum(fdscr);
    345 		DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
    346 			udf_rw32(udf_rw32_lbmap)));
    347 		/* no use mapping the VAT node in the VAT */
    348 		return;
    349 	}
    350 
    351 	/* record new position in VAT file */
    352 	lb_num = udf_rw32(fdscr->tag.tag_loc);
    353 
    354 	/* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
    355 
    356 	DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
    357 			lb_num, lb_map));
    358 
    359 	/* VAT should be the longer than this write, can't go wrong */
    360 	KASSERT(lb_num <= ump->vat_entries);
    361 
    362 	mutex_enter(&ump->allocate_mutex);
    363 	error = udf_vat_write(ump->vat_node,
    364 			(uint8_t *) &udf_rw32_lbmap, 4,
    365 			ump->vat_offset + lb_num * 4);
    366 	mutex_exit(&ump->allocate_mutex);
    367 
    368 	if (error)
    369 		panic( "udf_VAT_mapping_update: HELP! i couldn't "
    370 			"write in the VAT file ?\n");
    371 }
    372 
    373 
    374 static void
    375 udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
    376 {
    377 	union dscrptr *dscr;
    378 	struct long_ad *node_ad_cpy;
    379 	struct part_desc *pdesc;
    380 	uint64_t *lmapping, *lmappos;
    381 	uint32_t sectornr, bpos;
    382 	uint32_t ptov;
    383 	uint16_t vpart_num;
    384 	uint8_t *fidblk;
    385 	int sector_size = ump->discinfo.sector_size;
    386 	int blks = sector_size / DEV_BSIZE;
    387 	int len, buf_len;
    388 
    389 	/* if reading, just pass to the device's STRATEGY */
    390 	if (queue == UDF_SHED_READING) {
    391 		DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
    392 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
    393 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    394 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    395 		VOP_STRATEGY(ump->devvp, buf);
    396 		return;
    397 	}
    398 
    399 	if (queue == UDF_SHED_WRITING) {
    400 		DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
    401 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    402 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
    403 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
    404 		KASSERT(buf->b_udf_c_type == UDF_C_ABSOLUTE);
    405 
    406 		// udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    407 		VOP_STRATEGY(ump->devvp, buf);
    408 		return;
    409 	}
    410 
    411 	KASSERT(queue == UDF_SHED_SEQWRITING);
    412 	DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
    413 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
    414 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
    415 		buf->b_bufsize));
    416 
    417 	/*
    418 	 * Buffers should not have been allocated to disc addresses yet on
    419 	 * this queue. Note that a buffer can get multiple extents allocated.
    420 	 *
    421 	 * lmapping contains lb_num relative to base partition.
    422 	 */
    423 	lmapping    = ump->la_lmapping;
    424 	node_ad_cpy = ump->la_node_ad_cpy;
    425 
    426 	/* logically allocate buf and map it in the file */
    427 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
    428 
    429 	/*
    430 	 * NOTE We are using the knowledge here that sequential media will
    431 	 * always be mapped linearly. Thus no use to explicitly translate the
    432 	 * lmapping list.
    433 	 */
    434 
    435 	/* calculate offset from physical base partition */
    436 	pdesc = ump->partitions[ump->vtop[vpart_num]];
    437 	ptov  = udf_rw32(pdesc->start_loc);
    438 
    439 	/* set buffers blkno to the physical block number */
    440 	buf->b_blkno = (*lmapping + ptov) * blks;
    441 
    442 	/* fixate floating descriptors */
    443 	if (buf->b_udf_c_type == UDF_C_FLOAT_DSCR) {
    444 		/* set our tag location to the absolute position */
    445 		dscr = (union dscrptr *) buf->b_data;
    446 		dscr->tag.tag_loc = udf_rw32(*lmapping + ptov);
    447 		udf_validate_tag_and_crc_sums(dscr);
    448 	}
    449 
    450 	/* update mapping in the VAT */
    451 	if (buf->b_udf_c_type == UDF_C_NODE) {
    452 		udf_VAT_mapping_update(ump, buf, *lmapping);
    453 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
    454 	}
    455 
    456 	/* if we have FIDs, fixup using the new allocation table */
    457 	if (buf->b_udf_c_type == UDF_C_FIDS) {
    458 		buf_len = buf->b_bcount;
    459 		bpos = 0;
    460 		lmappos = lmapping;
    461 		while (buf_len) {
    462 			sectornr = *lmappos++;
    463 			len = MIN(buf_len, sector_size);
    464 			fidblk = (uint8_t *) buf->b_data + bpos;
    465 			udf_fixup_fid_block(fidblk, sector_size,
    466 				0, len, sectornr);
    467 			bpos += len;
    468 			buf_len -= len;
    469 		}
    470 	}
    471 
    472 	VOP_STRATEGY(ump->devvp, buf);
    473 }
    474 
    475 
    476 static void
    477 udf_doshedule(struct udf_mount *ump)
    478 {
    479 	struct buf *buf;
    480 	struct timespec now, *last;
    481 	struct strat_private *priv = PRIV(ump);
    482 	void (*b_callback)(struct buf *);
    483 	int new_queue;
    484 	int error;
    485 
    486 	buf = bufq_get(priv->queues[priv->cur_queue]);
    487 	if (buf) {
    488 		/* transfer from the current queue to the device queue */
    489 		mutex_exit(&priv->discstrat_mutex);
    490 
    491 		/* transform buffer to synchronous; XXX needed? */
    492 		b_callback = buf->b_iodone;
    493 		buf->b_iodone = NULL;
    494 		CLR(buf->b_flags, B_ASYNC);
    495 
    496 		/* issue and wait on completion */
    497 		udf_issue_buf(ump, priv->cur_queue, buf);
    498 		biowait(buf);
    499 
    500 		mutex_enter(&priv->discstrat_mutex);
    501 
    502 		/* if there is an error, repair this error, otherwise propagate */
    503 		if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
    504 			/* check what we need to do */
    505 			panic("UDF write error, can't handle yet!\n");
    506 		}
    507 
    508 		/* propagate result to higher layers */
    509 		if (b_callback) {
    510 			buf->b_iodone = b_callback;
    511 			(*buf->b_iodone)(buf);
    512 		}
    513 
    514 		return;
    515 	}
    516 
    517 	/* Check if we're idling in this state */
    518 	vfs_timestamp(&now);
    519 	last = &priv->last_queued[priv->cur_queue];
    520 	if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
    521 		/* dont switch too fast for CD media; its expensive in time */
    522 		if (now.tv_sec - last->tv_sec < 3)
    523 			return;
    524 	}
    525 
    526 	/* check if we can/should switch */
    527 	new_queue = priv->cur_queue;
    528 
    529 	if (bufq_peek(priv->queues[UDF_SHED_READING]))
    530 		new_queue = UDF_SHED_READING;
    531 	if (bufq_peek(priv->queues[UDF_SHED_WRITING]))		/* only for unmount */
    532 		new_queue = UDF_SHED_WRITING;
    533 	if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
    534 		new_queue = UDF_SHED_SEQWRITING;
    535 	if (priv->cur_queue == UDF_SHED_READING) {
    536 		if (new_queue == UDF_SHED_SEQWRITING) {
    537 			/* TODO use flag to signal if this is needed */
    538 			mutex_exit(&priv->discstrat_mutex);
    539 
    540 			/* update trackinfo for data and metadata */
    541 			error = udf_update_trackinfo(ump,
    542 					&ump->data_track);
    543 			assert(error == 0);
    544 			error = udf_update_trackinfo(ump,
    545 					&ump->metadata_track);
    546 			assert(error == 0);
    547 			mutex_enter(&priv->discstrat_mutex);
    548 			__USE(error);
    549 		}
    550 	}
    551 
    552 	if (new_queue != priv->cur_queue) {
    553 		DPRINTF(SHEDULE, ("switching from %d to %d\n",
    554 			priv->cur_queue, new_queue));
    555 		if (new_queue == UDF_SHED_READING)
    556 			udf_mmc_synchronise_caches(ump);
    557 	}
    558 
    559 	priv->cur_queue = new_queue;
    560 }
    561 
    562 
    563 static void
    564 udf_discstrat_thread(void *arg)
    565 {
    566 	struct udf_mount *ump = (struct udf_mount *) arg;
    567 	struct strat_private *priv = PRIV(ump);
    568 	int empty;
    569 
    570 	empty = 1;
    571 
    572 	priv->thread_running = 1;
    573 	cv_broadcast(&priv->discstrat_cv);
    574 
    575 	mutex_enter(&priv->discstrat_mutex);
    576 	while (priv->run_thread || !empty || priv->sync_req) {
    577 		/* process the current selected queue */
    578 		udf_doshedule(ump);
    579 		empty  = (bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
    580 		empty &= (bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
    581 		empty &= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
    582 
    583 		/* wait for more if needed */
    584 		if (empty) {
    585 			if (priv->sync_req) {
    586 				/* on sync, we need to simulate a read->write transition */
    587 				udf_mmc_synchronise_caches(ump);
    588 				priv->cur_queue = UDF_SHED_READING;
    589 				priv->sync_req = 0;
    590 			}
    591 			cv_timedwait(&priv->discstrat_cv,
    592 				&priv->discstrat_mutex, hz/8);
    593 		}
    594 	}
    595 	mutex_exit(&priv->discstrat_mutex);
    596 
    597 	priv->thread_running  = 0;
    598 	priv->thread_finished = 1;
    599 	cv_broadcast(&priv->discstrat_cv);
    600 
    601 	kthread_exit(0);
    602 	/* not reached */
    603 }
    604 
    605 /* --------------------------------------------------------------------- */
    606 
    607 static void
    608 udf_discstrat_init_seq(struct udf_strat_args *args)
    609 {
    610 	struct udf_mount *ump = args->ump;
    611 	struct strat_private *priv = PRIV(ump);
    612 	struct disk_strategy dkstrat;
    613 	uint32_t lb_size;
    614 
    615 	KASSERT(ump);
    616 	KASSERT(ump->logical_vol);
    617 	KASSERT(priv == NULL);
    618 
    619 	lb_size = udf_rw32(ump->logical_vol->lb_size);
    620 	KASSERT(lb_size > 0);
    621 
    622 	/* initialise our memory space */
    623 	ump->strategy_private = malloc(sizeof(struct strat_private),
    624 		M_UDFTEMP, M_WAITOK);
    625 	priv = ump->strategy_private;
    626 	memset(priv, 0 , sizeof(struct strat_private));
    627 
    628 	/* initialise locks */
    629 	cv_init(&priv->discstrat_cv, "udfstrat");
    630 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
    631 
    632 	/*
    633 	 * Initialise pool for descriptors associated with nodes. This is done
    634 	 * in lb_size units though currently lb_size is dictated to be
    635 	 * sector_size.
    636 	 */
    637 	pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
    638 	    IPL_NONE);
    639 
    640 	/*
    641 	 * remember old device strategy method and explicit set method
    642 	 * `discsort' since we have our own more complex strategy that is not
    643 	 * implementable on the CD device and other strategies will get in the
    644 	 * way.
    645 	 */
    646 	memset(&priv->old_strategy_setting, 0,
    647 		sizeof(struct disk_strategy));
    648 	VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
    649 		FREAD | FKIOCTL, NOCRED);
    650 	memset(&dkstrat, 0, sizeof(struct disk_strategy));
    651 	strcpy(dkstrat.dks_name, "discsort");
    652 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
    653 		NOCRED);
    654 
    655 	/* initialise our internal scheduler */
    656 	priv->cur_queue = UDF_SHED_READING;
    657 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
    658 		BUFQ_SORT_RAWBLOCK);
    659 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
    660 		BUFQ_SORT_RAWBLOCK);
    661 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
    662 	vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
    663 	vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
    664 	vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
    665 
    666 	/* create our disk strategy thread */
    667 	priv->thread_finished = 0;
    668 	priv->thread_running  = 0;
    669 	priv->run_thread      = 1;
    670 	priv->sync_req        = 0;
    671 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
    672 		udf_discstrat_thread, ump, &priv->queue_lwp,
    673 		"%s", "udf_rw")) {
    674 		panic("fork udf_rw");
    675 	}
    676 
    677 	/* wait for thread to spin up */
    678 	mutex_enter(&priv->discstrat_mutex);
    679 	while (!priv->thread_running) {
    680 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
    681 	}
    682 	mutex_exit(&priv->discstrat_mutex);
    683 }
    684 
    685 
    686 static void
    687 udf_discstrat_finish_seq(struct udf_strat_args *args)
    688 {
    689 	struct udf_mount *ump = args->ump;
    690 	struct strat_private *priv = PRIV(ump);
    691 
    692 	if (ump == NULL)
    693 		return;
    694 
    695 	/* stop our scheduling thread */
    696 	KASSERT(priv->run_thread == 1);
    697 	priv->run_thread = 0;
    698 
    699 	mutex_enter(&priv->discstrat_mutex);
    700 	while (!priv->thread_finished) {
    701 		cv_broadcast(&priv->discstrat_cv);
    702 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
    703 	}
    704 	mutex_exit(&priv->discstrat_mutex);
    705 
    706 	/* kthread should be finished now */
    707 
    708 	/* set back old device strategy method */
    709 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
    710 			FWRITE, NOCRED);
    711 
    712 	/* destroy our pool */
    713 	pool_destroy(&priv->desc_pool);
    714 
    715 	mutex_destroy(&priv->discstrat_mutex);
    716 	cv_destroy(&priv->discstrat_cv);
    717 
    718 	/* free our private space */
    719 	free(ump->strategy_private, M_UDFTEMP);
    720 	ump->strategy_private = NULL;
    721 }
    722 
    723 /* --------------------------------------------------------------------- */
    724 
    725 struct udf_strategy udf_strat_sequential =
    726 {
    727 	udf_create_logvol_dscr_seq,
    728 	udf_free_logvol_dscr_seq,
    729 	udf_read_logvol_dscr_seq,
    730 	udf_write_logvol_dscr_seq,
    731 	udf_queuebuf_seq,
    732 	udf_sync_caches_seq,
    733 	udf_discstrat_init_seq,
    734 	udf_discstrat_finish_seq
    735 };
    736 
    737 
    738