Home | History | Annotate | Line # | Download | only in pgfs
pgfs_puffs.c revision 1.2
      1 /*	$NetBSD: pgfs_puffs.c,v 1.2 2012/04/11 14:25:54 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2010,2011 YAMAMOTO Takashi,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * puffs node ops and fs ops.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 #ifndef lint
     35 __RCSID("$NetBSD: pgfs_puffs.c,v 1.2 2012/04/11 14:25:54 yamt Exp $");
     36 #endif /* not lint */
     37 
     38 #include <assert.h>
     39 #include <err.h>
     40 #include <errno.h>
     41 #include <puffs.h>
     42 #include <inttypes.h>
     43 #include <stdarg.h>
     44 #include <stdbool.h>
     45 #include <stdio.h>
     46 #include <stdlib.h>
     47 #include <time.h>
     48 #include <util.h>
     49 
     50 #include <libpq-fe.h>
     51 #include <libpq/libpq-fs.h>	/* INV_* */
     52 
     53 #include "pgfs.h"
     54 #include "pgfs_db.h"
     55 #include "pgfs_subs.h"
     56 #include "pgfs_debug.h"
     57 
     58 static fileid_t
     59 cookie_to_fileid(puffs_cookie_t cookie)
     60 {
     61 
     62 	return (fileid_t)(uintptr_t)cookie;
     63 }
     64 
     65 static puffs_cookie_t
     66 fileid_to_cookie(fileid_t id)
     67 {
     68 	puffs_cookie_t cookie = (puffs_cookie_t)(uintptr_t)id;
     69 
     70 	/* XXX not true for 32-bit ports */
     71 	assert(cookie_to_fileid(cookie) == id);
     72 	return cookie;
     73 }
     74 
     75 puffs_cookie_t
     76 pgfs_root_cookie(void)
     77 {
     78 
     79 	return fileid_to_cookie(PGFS_ROOT_FILEID);
     80 }
     81 
     82 int
     83 pgfs_node_getattr(struct puffs_usermount *pu, puffs_cookie_t opc,
     84     struct vattr *va, const struct puffs_cred *pcr)
     85 {
     86 	struct Xconn *xc;
     87 	struct fileid_lock_handle *lock;
     88 	fileid_t fileid = cookie_to_fileid(opc);
     89 	int error;
     90 
     91 	DPRINTF("%llu\n", fileid);
     92 	lock = fileid_lock(fileid, puffs_cc_getcc(pu));
     93 retry:
     94 	xc = begin_readonly(pu);
     95 	error = getattr(xc, fileid, va, GETATTR_ALL);
     96 	if (error != 0) {
     97 		goto got_error;
     98 	}
     99 	error = commit(xc);
    100 	if (error != 0) {
    101 		goto got_error;
    102 	}
    103 	goto done;
    104 got_error:
    105 	rollback(xc);
    106 	if (error == EAGAIN) {
    107 		goto retry;
    108 	}
    109 done:
    110 	fileid_unlock(lock);
    111 	return error;
    112 }
    113 
    114 #define	PGFS_DIRCOOKIE_DOT	0	/* . entry */
    115 #define	PGFS_DIRCOOKIE_DOTDOT	1	/* .. entry */
    116 #define	PGFS_DIRCOOKIE_EOD	2	/* end of directory */
    117 
    118 int
    119 pgfs_node_readdir(struct puffs_usermount *pu, puffs_cookie_t opc,
    120     struct dirent *dent, off_t *readoff, size_t *reslen,
    121     const struct puffs_cred *pcr, int *eofflag, off_t *cookies,
    122     size_t *ncookies)
    123 {
    124 	fileid_t parent_fileid;
    125 	fileid_t child_fileid;
    126 	uint64_t cookie;
    127 	uint64_t nextcookie;
    128 	uint64_t offset;
    129 	struct Xconn *xc = NULL;
    130 	static const Oid types[] = {
    131 		TEXTOID,	/* name */
    132 		INT8OID,	/* cookie */
    133 		INT8OID,	/* nextcookie */
    134 		INT8OID,	/* child_fileid */
    135 	};
    136 	const char *name;
    137 	char *nametofree = NULL;
    138 	struct fetchstatus s;
    139 	int error;
    140 	bool fetching;
    141 	bool bufferfull;
    142 
    143 	parent_fileid = cookie_to_fileid(opc);
    144 	offset = *readoff;
    145 	DPRINTF("%llu %" PRIu64 "\n", parent_fileid, offset);
    146 	*ncookies = 0;
    147 	fetching = false;
    148 next:
    149 	if (offset == PGFS_DIRCOOKIE_DOT) {
    150 		name = ".";
    151 		child_fileid = parent_fileid;
    152 		cookie = offset;
    153 		nextcookie = PGFS_DIRCOOKIE_DOTDOT;
    154 		goto store_and_next;
    155 	}
    156 	if (offset == PGFS_DIRCOOKIE_DOTDOT) {
    157 		if (parent_fileid != PGFS_ROOT_FILEID) {
    158 			if (xc == NULL) {
    159 				xc = begin(pu);
    160 			}
    161 			error = lookupp(xc, parent_fileid, &child_fileid);
    162 			if (error != 0) {
    163 				rollback(xc);
    164 				return error;
    165 			}
    166 		} else {
    167 			child_fileid = parent_fileid;
    168 		}
    169 		name = "..";
    170 		cookie = offset;
    171 		nextcookie = PGFS_DIRCOOKIE_EOD + 1;
    172 		goto store_and_next;
    173 	}
    174 	if (offset == PGFS_DIRCOOKIE_EOD) {
    175 		*eofflag = 1;
    176 		goto done;
    177 	}
    178 	/* offset > PGFS_DIRCOOKIE_EOD; normal entries */
    179 	if (xc == NULL) {
    180 		xc = begin(pu);
    181 	}
    182 	if (!fetching) {
    183 		static struct cmd *c;
    184 
    185 		/*
    186 		 * a simpler query like "ORDER BY name OFFSET :offset - 3"
    187 		 * would work well for most of cases.  however, it doesn't for
    188 		 * applications which expect readdir cookies are kept valid
    189 		 * even after unlink of other entries in the directory.
    190 		 * eg. cvs, bonnie++
    191 		 *
    192 		 * 2::int8 == PGFS_DIRCOOKIE_EOD
    193 		 */
    194 		CREATECMD(c,
    195 			"SELECT name, cookie, "
    196 			"lead(cookie, 1, 2::int8) OVER (ORDER BY cookie), "
    197 			"child_fileid "
    198 			"FROM dirent "
    199 			"WHERE parent_fileid = $1 "
    200 			"AND cookie >= $2 "
    201 			"ORDER BY cookie", INT8OID, INT8OID);
    202 		error = sendcmd(xc, c, parent_fileid, offset);
    203 		if (error != 0) {
    204 			rollback(xc);
    205 			return error;
    206 		}
    207 		fetching = true;
    208 		fetchinit(&s, xc);
    209 	}
    210 	/*
    211 	 * fetch and process an entry
    212 	 */
    213 	error = FETCHNEXT(&s, types, &nametofree, &cookie, &nextcookie,
    214 	    &child_fileid);
    215 	if (error == ENOENT) {
    216 		DPRINTF("ENOENT\n");
    217 		if (offset == PGFS_DIRCOOKIE_EOD + 1) {
    218 			DPRINTF("empty directory\n");
    219 			*eofflag = 1;
    220 			goto done;
    221 		}
    222 		fetchdone(&s);
    223 		rollback(xc);
    224 		return EINVAL;
    225 	}
    226 	if (error != 0) {
    227 		DPRINTF("error %d\n", error);
    228 		fetchdone(&s);
    229 		rollback(xc);
    230 		return error;
    231 	}
    232 	if (offset != cookie && offset != PGFS_DIRCOOKIE_EOD + 1) {
    233 		free(nametofree);
    234 		fetchdone(&s);
    235 		rollback(xc);
    236 		return EINVAL;
    237 	}
    238 	name = nametofree;
    239 store_and_next:
    240 	/*
    241 	 * store an entry and continue processing unless the result buffer
    242 	 * is full.
    243 	 */
    244 	bufferfull = !puffs_nextdent(&dent, name, child_fileid, DT_UNKNOWN,
    245 	    reslen);
    246 	free(nametofree);
    247 	nametofree = NULL;
    248 	if (bufferfull) {
    249 		*eofflag = 0;
    250 		goto done;
    251 	}
    252 	PUFFS_STORE_DCOOKIE(cookies, ncookies, cookie);
    253 	offset = nextcookie;
    254 	*readoff = offset;
    255 	goto next;
    256 done:
    257 	/*
    258 	 * cleanup and update atime of the directory.
    259 	 */
    260 	assert(nametofree == NULL);
    261 	if (fetching) {
    262 		fetchdone(&s);
    263 		fetching = false;
    264 	}
    265 	if (xc == NULL) {
    266 retry:
    267 		xc = begin(pu);
    268 	}
    269 	error = update_atime(xc, parent_fileid);
    270 	if (error != 0) {
    271 		goto got_error;
    272 	}
    273 	error = commit(xc);
    274 	if (error != 0) {
    275 		goto got_error;
    276 	}
    277 	return 0;
    278 got_error:
    279 	rollback(xc);
    280 	if (error == EAGAIN) {
    281 		goto retry;
    282 	}
    283 	return error;
    284 }
    285 
    286 int
    287 pgfs_node_lookup(struct puffs_usermount *pu, puffs_cookie_t opc,
    288     struct puffs_newinfo *pni, const struct puffs_cn *pcn)
    289 {
    290 	struct vattr dva;
    291 	struct vattr cva;
    292 	struct puffs_cred * const pcr = pcn->pcn_cred;
    293 	fileid_t parent_fileid;
    294 	const char *name;
    295 	fileid_t child_fileid;
    296 	struct Xconn *xc;
    297 	mode_t access_mode;
    298 	int error;
    299 	int saved_error;
    300 
    301 	parent_fileid = cookie_to_fileid(opc);
    302 	name = pcn->pcn_name;
    303 	DPRINTF("%llu %s\n", parent_fileid, name);
    304 	assert(strcmp(name, ".")); /* . is handled by framework */
    305 retry:
    306 	xc = begin_readonly(pu);
    307 	error = getattr(xc, parent_fileid, &dva,
    308 	    GETATTR_TYPE|GETATTR_MODE|GETATTR_UID|GETATTR_GID);
    309 	if (error != 0) {
    310 		goto got_error;
    311 	}
    312 	access_mode = PUFFS_VEXEC;
    313 	if ((pcn->pcn_flags & NAMEI_ISLASTCN) != 0 &&
    314 	    pcn->pcn_nameiop != NAMEI_LOOKUP) {
    315 		access_mode |= PUFFS_VWRITE;
    316 	}
    317 	error = puffs_access(dva.va_type, dva.va_mode, dva.va_uid, dva.va_gid,
    318 	    access_mode, pcr);
    319 	if (error != 0) {
    320 		goto commit_and_return;
    321 	}
    322 	if (!strcmp(name, "..")) {
    323 		error = lookupp(xc, parent_fileid, &child_fileid);
    324 		if (error != 0) {
    325 			goto got_error;
    326 		}
    327 	} else {
    328 		static struct cmd *c;
    329 		static const Oid types[] = { INT8OID, };
    330 		struct fetchstatus s;
    331 
    332 		CREATECMD(c, "SELECT child_fileid "
    333 			"FROM dirent "
    334 			"WHERE parent_fileid = $1 AND name = $2;",
    335 			INT8OID, TEXTOID);
    336 		error = sendcmd(xc, c, parent_fileid, name);
    337 		if (error != 0) {
    338 			DPRINTF("sendcmd %d\n", error);
    339 			goto got_error;
    340 		}
    341 		fetchinit(&s, xc);
    342 		error = FETCHNEXT(&s, types, &child_fileid);
    343 		fetchdone(&s);
    344 		if (error == ENOENT) {
    345 			goto commit_and_return;
    346 		}
    347 		if (error != 0) {
    348 			goto got_error;
    349 		}
    350 	}
    351 	error = getattr(xc, child_fileid, &cva, GETATTR_TYPE|GETATTR_SIZE);
    352 	if (error != 0) {
    353 		goto got_error;
    354 	}
    355 	error = commit(xc);
    356 	if (error != 0) {
    357 		goto got_error;
    358 	}
    359 	puffs_newinfo_setcookie(pni, fileid_to_cookie(child_fileid));
    360 	puffs_newinfo_setvtype(pni, cva.va_type);
    361 	puffs_newinfo_setsize(pni, cva.va_size);
    362 	return 0;
    363 got_error:
    364 	rollback(xc);
    365 	if (error == EAGAIN) {
    366 		goto retry;
    367 	}
    368 	return error;
    369 commit_and_return:
    370 	saved_error = error;
    371 	error = commit(xc);
    372 	if (error != 0) {
    373 		goto got_error;
    374 	}
    375 	return saved_error;
    376 }
    377 
    378 int
    379 pgfs_node_mkdir(struct puffs_usermount *pu, puffs_cookie_t opc,
    380     struct puffs_newinfo *pni, const struct puffs_cn *pcn,
    381     const struct vattr *va)
    382 {
    383 	struct Xconn *xc;
    384 	fileid_t parent_fileid = cookie_to_fileid(opc);
    385 	fileid_t new_fileid;
    386 	struct puffs_cred * const pcr = pcn->pcn_cred;
    387 	uid_t uid;
    388 	gid_t gid;
    389 	int error;
    390 
    391 	DPRINTF("%llu %s\n", parent_fileid, pcn->pcn_name);
    392 	if (puffs_cred_getuid(pcr, &uid) == -1 ||
    393 	    puffs_cred_getgid(pcr, &gid) == -1) {
    394 		return errno;
    395 	}
    396 retry:
    397 	xc = begin(pu);
    398 	error = mklinkfile(xc, parent_fileid, pcn->pcn_name, VDIR,
    399 	    va->va_mode, uid, gid, &new_fileid);
    400 	if (error == 0) {
    401 		error = update_nlink(xc, parent_fileid, 1);
    402 	}
    403 	if (error != 0) {
    404 		goto got_error;
    405 	}
    406 	error = commit(xc);
    407 	if (error != 0) {
    408 		goto got_error;
    409 	}
    410 	puffs_newinfo_setcookie(pni, fileid_to_cookie(new_fileid));
    411 	return 0;
    412 got_error:
    413 	rollback(xc);
    414 	if (error == EAGAIN) {
    415 		goto retry;
    416 	}
    417 	return error;
    418 }
    419 
    420 int
    421 pgfs_node_create(struct puffs_usermount *pu, puffs_cookie_t opc,
    422     struct puffs_newinfo *pni, const struct puffs_cn *pcn,
    423     const struct vattr *va)
    424 {
    425 	struct Xconn *xc;
    426 	fileid_t parent_fileid = cookie_to_fileid(opc);
    427 	fileid_t new_fileid;
    428 	struct puffs_cred * const pcr = pcn->pcn_cred;
    429 	uid_t uid;
    430 	gid_t gid;
    431 	int error;
    432 
    433 	DPRINTF("%llu %s\n", parent_fileid, pcn->pcn_name);
    434 	if (puffs_cred_getuid(pcr, &uid) == -1 ||
    435 	    puffs_cred_getgid(pcr, &gid) == -1) {
    436 		return errno;
    437 	}
    438 retry:
    439 	xc = begin(pu);
    440 	error = mklinkfile_lo(xc, parent_fileid, pcn->pcn_name, VREG,
    441 	    va->va_mode,
    442 	    uid, gid, &new_fileid, NULL);
    443 	if (error != 0) {
    444 		goto got_error;
    445 	}
    446 	error = commit(xc);
    447 	if (error != 0) {
    448 		goto got_error;
    449 	}
    450 	puffs_newinfo_setcookie(pni, fileid_to_cookie(new_fileid));
    451 	return 0;
    452 got_error:
    453 	rollback(xc);
    454 	if (error == EAGAIN) {
    455 		goto retry;
    456 	}
    457 	return error;
    458 }
    459 
    460 int
    461 pgfs_node_write(struct puffs_usermount *pu, puffs_cookie_t opc,
    462     uint8_t *buf, off_t offset, size_t *resid,
    463     const struct puffs_cred *pcr, int ioflags)
    464 {
    465 	struct Xconn *xc;
    466 	struct fileid_lock_handle *lock;
    467 	fileid_t fileid = cookie_to_fileid(opc);
    468 	size_t resultlen;
    469 	int fd;
    470 	int error;
    471 
    472 	if ((ioflags & PUFFS_IO_APPEND) != 0) {
    473 		DPRINTF("%llu append sz %zu\n", fileid, *resid);
    474 	} else {
    475 		DPRINTF("%llu off %" PRIu64 " sz %zu\n", fileid,
    476 		    (uint64_t)offset, *resid);
    477 	}
    478 	lock = fileid_lock(fileid, puffs_cc_getcc(pu));
    479 retry:
    480 	xc = begin(pu);
    481 	error = update_mctime(xc, fileid);
    482 	if (error != 0) {
    483 		goto got_error;
    484 	}
    485 	error = lo_open_by_fileid(xc, fileid, INV_WRITE, &fd);
    486 	if (error != 0) {
    487 		goto got_error;
    488 	}
    489 	if ((ioflags & PUFFS_IO_APPEND) != 0) {
    490 		int32_t off;
    491 
    492 		error = my_lo_lseek(xc, fd, 0, SEEK_END, &off);
    493 		if (error != 0) {
    494 			goto got_error;
    495 		}
    496 		offset = off;
    497 	}
    498 	if (offset < 0) {			/* negative offset */
    499 		error = EINVAL;
    500 		goto got_error;
    501 	}
    502 	if ((uint64_t)(INT64_MAX - offset) < *resid ||	/* int64 overflow */
    503 	    INT_MAX < offset + *resid) {	/* our max filesize */
    504 		error = EFBIG;
    505 		goto got_error;
    506 	}
    507 	if ((ioflags & PUFFS_IO_APPEND) == 0) {
    508 		error = my_lo_lseek(xc, fd, offset, SEEK_SET, NULL);
    509 		if (error != 0) {
    510 			goto got_error;
    511 		}
    512 	}
    513 	error = my_lo_write(xc, fd, (const char *)buf, *resid, &resultlen);
    514 	if (error != 0) {
    515 		goto got_error;
    516 	}
    517 	assert(*resid >= resultlen);
    518 	error = commit(xc);
    519 	if (error != 0) {
    520 		goto got_error;
    521 	}
    522 	*resid -= resultlen;
    523 	DPRINTF("resid %zu\n", *resid);
    524 	goto done;
    525 got_error:
    526 	rollback(xc);
    527 	if (error == EAGAIN) {
    528 		goto retry;
    529 	}
    530 done:
    531 	fileid_unlock(lock);
    532 	return error;
    533 }
    534 
    535 int
    536 pgfs_node_read(struct puffs_usermount *pu, puffs_cookie_t opc,
    537     uint8_t *buf, off_t offset, size_t *resid,
    538     const struct puffs_cred *pcr, int ioflags)
    539 {
    540 	struct Xconn *xc;
    541 	fileid_t fileid = cookie_to_fileid(opc);
    542 	size_t resultlen;
    543 	int fd;
    544 	int error;
    545 
    546 	DPRINTF("%llu off %" PRIu64 " sz %zu\n",
    547 	    fileid, (uint64_t)offset, *resid);
    548 retry:
    549 	xc = begin(pu);
    550 	/*
    551 	 * try to update atime first as it's prune to conflict with other
    552 	 * transactions.  eg. read-ahead requests can conflict each other.
    553 	 * we don't want to retry my_lo_read as it's expensive.
    554 	 *
    555 	 * XXX probably worth to implement noatime mount option.
    556 	 */
    557 	error = update_atime(xc, fileid);
    558 	if (error != 0) {
    559 		goto got_error;
    560 	}
    561 	error = lo_open_by_fileid(xc, fileid, INV_READ, &fd);
    562 	if (error != 0) {
    563 		goto got_error;
    564 	}
    565 	error = my_lo_lseek(xc, fd, offset, SEEK_SET, NULL);
    566 	if (error != 0) {
    567 		goto got_error;
    568 	}
    569 	error = my_lo_read(xc, fd, buf, *resid, &resultlen);
    570 	if (error != 0) {
    571 		goto got_error;
    572 	}
    573 	assert(*resid >= resultlen);
    574 	error = commit(xc);
    575 	if (error != 0) {
    576 		goto got_error;
    577 	}
    578 	*resid -= resultlen;
    579 	return 0;
    580 got_error:
    581 	rollback(xc);
    582 	if (error == EAGAIN) {
    583 		goto retry;
    584 	}
    585 	return error;
    586 }
    587 
    588 int
    589 pgfs_node_link(struct puffs_usermount *pu, puffs_cookie_t dir_opc,
    590     puffs_cookie_t targ_opc, const struct puffs_cn *pcn)
    591 {
    592 	struct Xconn *xc;
    593 	fileid_t dir_fileid = cookie_to_fileid(dir_opc);
    594 	fileid_t targ_fileid = cookie_to_fileid(targ_opc);
    595 	struct vattr va;
    596 	int error;
    597 
    598 	DPRINTF("%llu %llu %s\n", dir_fileid, targ_fileid, pcn->pcn_name);
    599 retry:
    600 	xc = begin(pu);
    601 	error = getattr(xc, targ_fileid, &va, GETATTR_TYPE);
    602 	if (error != 0) {
    603 		goto got_error;
    604 	}
    605 	if (va.va_type == VDIR) {
    606 		error = EPERM;
    607 		goto got_error;
    608 	}
    609 	error = linkfile(xc, dir_fileid, pcn->pcn_name, targ_fileid);
    610 	if (error != 0) {
    611 		goto got_error;
    612 	}
    613 	error = update_ctime(xc, targ_fileid);
    614 	if (error != 0) {
    615 		goto got_error;
    616 	}
    617 	error = commit(xc);
    618 	if (error != 0) {
    619 		goto got_error;
    620 	}
    621 	return 0;
    622 got_error:
    623 	rollback(xc);
    624 	if (error == EAGAIN) {
    625 		goto retry;
    626 	}
    627 	return error;
    628 }
    629 
    630 int
    631 pgfs_node_remove(struct puffs_usermount *pu, puffs_cookie_t opc,
    632     puffs_cookie_t targ, const struct puffs_cn *pcn)
    633 {
    634 	struct Xconn *xc;
    635 	fileid_t fileid = cookie_to_fileid(opc);
    636 	fileid_t targ_fileid = cookie_to_fileid(targ);
    637 	struct vattr va;
    638 	int error;
    639 
    640 retry:
    641 	xc = begin(pu);
    642 	error = getattr(xc, targ_fileid, &va, GETATTR_TYPE);
    643 	if (error != 0) {
    644 		goto got_error;
    645 	}
    646 	if (va.va_type == VDIR) {
    647 		error = EPERM;
    648 		goto got_error;
    649 	}
    650 	error = unlinkfile(xc, fileid, pcn->pcn_name, targ_fileid);
    651 	if (error != 0) {
    652 		goto got_error;
    653 	}
    654 	error = commit(xc);
    655 	if (error != 0) {
    656 		goto got_error;
    657 	}
    658 	puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_INACT_N2);
    659 	return 0;
    660 got_error:
    661 	rollback(xc);
    662 	if (error == EAGAIN) {
    663 		goto retry;
    664 	}
    665 	return error;
    666 }
    667 
    668 int
    669 pgfs_node_rmdir(struct puffs_usermount *pu, puffs_cookie_t opc,
    670     puffs_cookie_t targ, const struct puffs_cn *pcn)
    671 {
    672 	struct Xconn *xc;
    673 	fileid_t parent_fileid = cookie_to_fileid(opc);
    674 	fileid_t targ_fileid = cookie_to_fileid(targ);
    675 	struct vattr va;
    676 	bool empty;
    677 	int error;
    678 
    679 retry:
    680 	xc = begin(pu);
    681 	error = getattr(xc, targ_fileid, &va, GETATTR_TYPE);
    682 	if (error != 0) {
    683 		goto got_error;
    684 	}
    685 	if (va.va_type != VDIR) {
    686 		error = ENOTDIR;
    687 		goto got_error;
    688 	}
    689 	error = isempty(xc, targ_fileid, &empty);
    690 	if (error != 0) {
    691 		goto got_error;
    692 	}
    693 	if (!empty) {
    694 		error = ENOTEMPTY;
    695 		goto got_error;
    696 	}
    697 	error = unlinkfile(xc, parent_fileid, pcn->pcn_name, targ_fileid);
    698 	if (error == 0) {
    699 		error = update_nlink(xc, parent_fileid, -1);
    700 	}
    701 	if (error != 0) {
    702 		goto got_error;
    703 	}
    704 	error = commit(xc);
    705 	if (error != 0) {
    706 		goto got_error;
    707 	}
    708 	puffs_setback(puffs_cc_getcc(pu), PUFFS_SETBACK_INACT_N2);
    709 	return 0;
    710 got_error:
    711 	rollback(xc);
    712 	if (error == EAGAIN) {
    713 		goto retry;
    714 	}
    715 	return error;
    716 }
    717 
    718 int
    719 pgfs_node_inactive(struct puffs_usermount *pu, puffs_cookie_t opc)
    720 {
    721 	struct Xconn *xc;
    722 	fileid_t fileid = cookie_to_fileid(opc);
    723 	struct vattr va;
    724 	int error;
    725 
    726 	/*
    727 	 * XXX
    728 	 * probably this should be handed to the separate "reaper" context
    729 	 * because lo_unlink() can be too expensive to execute synchronously.
    730 	 * however, the puffs_cc API doesn't provide a way to create a worker
    731 	 * context.
    732 	 */
    733 
    734 	DPRINTF("%llu\n", fileid);
    735 retry:
    736 	xc = begin(pu);
    737 	error = getattr(xc, fileid, &va, GETATTR_NLINK|GETATTR_TYPE);
    738 	if (error != 0) {
    739 		DPRINTF("%llu GETATTR fail\n", fileid);
    740 		goto got_error;
    741 	}
    742 	if (va.va_nlink == 0) {
    743 		DPRINTF("%llu nlink=0\n", fileid);
    744 		error = cleanupfile(xc, fileid, &va);
    745 		if (error != 0) {
    746 			goto got_error;
    747 		}
    748 	}
    749 	error = commit(xc);
    750 	if (error != 0) {
    751 		goto got_error;
    752 	}
    753 	return 0;
    754 got_error:
    755 	rollback(xc);
    756 	if (error == EAGAIN) {
    757 		goto retry;
    758 	}
    759 	return error;
    760 }
    761 
    762 int
    763 pgfs_node_setattr(struct puffs_usermount *pu, puffs_cookie_t opc,
    764     const struct vattr *va, const struct puffs_cred *pcr)
    765 {
    766 	struct Xconn *xc;
    767 	struct fileid_lock_handle *lock;
    768 	fileid_t fileid = cookie_to_fileid(opc);
    769 	struct vattr ova;
    770 	unsigned int attrs;
    771 	int error;
    772 
    773 	DPRINTF("%llu\n", fileid);
    774 	if (va->va_flags != (u_long)PUFFS_VNOVAL) {
    775 		return EOPNOTSUPP;
    776 	}
    777 	attrs = 0;
    778 	if (va->va_uid != (uid_t)PUFFS_VNOVAL ||
    779 	    va->va_gid != (gid_t)PUFFS_VNOVAL) {
    780 		attrs |= GETATTR_UID|GETATTR_GID|GETATTR_MODE;
    781 	}
    782 	if (va->va_mode != (mode_t)PUFFS_VNOVAL) {
    783 		attrs |= GETATTR_TYPE|GETATTR_UID|GETATTR_GID;
    784 	}
    785 	if (va->va_atime.tv_sec != PUFFS_VNOVAL ||
    786 	    va->va_mtime.tv_sec != PUFFS_VNOVAL ||
    787 	    va->va_ctime.tv_sec != PUFFS_VNOVAL) {
    788 		attrs |= GETATTR_UID|GETATTR_GID|GETATTR_MODE;
    789 	}
    790 	lock = fileid_lock(fileid, puffs_cc_getcc(pu));
    791 retry:
    792 	xc = begin(pu);
    793 	error = getattr(xc, fileid, &ova, attrs);
    794 	if (error != 0) {
    795 		goto got_error;
    796 	}
    797 	if (va->va_uid != (uid_t)PUFFS_VNOVAL ||
    798 	    va->va_gid != (gid_t)PUFFS_VNOVAL) {
    799 		static struct cmd *c;
    800 		uint64_t newuid =
    801 		    va->va_uid != (uid_t)PUFFS_VNOVAL ? va->va_uid : ova.va_uid;
    802 		uint64_t newgid =
    803 		    va->va_gid != (gid_t)PUFFS_VNOVAL ? va->va_gid : ova.va_gid;
    804 
    805 		error = puffs_access_chown(ova.va_uid, ova.va_gid,
    806 		    newuid, newgid, pcr);
    807 		if (error != 0) {
    808 			goto got_error;
    809 		}
    810 		CREATECMD(c,
    811 			"UPDATE file "
    812 			"SET uid = $1, gid = $2 "
    813 			"WHERE fileid = $3", INT8OID, INT8OID, INT8OID);
    814 		error = simplecmd(xc, c, newuid, newgid, fileid);
    815 		if (error != 0) {
    816 			goto got_error;
    817 		}
    818 		ova.va_uid = newuid;
    819 		ova.va_gid = newgid;
    820 	}
    821 	if (va->va_mode != (mode_t)PUFFS_VNOVAL) {
    822 		static struct cmd *c;
    823 		uint64_t newmode = va->va_mode;
    824 
    825 		error = puffs_access_chmod(ova.va_uid, ova.va_gid, ova.va_type,
    826 		    newmode, pcr);
    827 		if (error != 0) {
    828 			goto got_error;
    829 		}
    830 		CREATECMD(c,
    831 			"UPDATE file "
    832 			"SET mode = $1 "
    833 			"WHERE fileid = $2", INT8OID, INT8OID);
    834 		error = simplecmd(xc, c, newmode, fileid);
    835 		if (error != 0) {
    836 			goto got_error;
    837 		}
    838 		ova.va_mode = newmode;
    839 	}
    840 	if (va->va_atime.tv_sec != PUFFS_VNOVAL ||
    841 	    va->va_mtime.tv_sec != PUFFS_VNOVAL ||
    842 	    va->va_ctime.tv_sec != PUFFS_VNOVAL ||
    843 	    va->va_birthtime.tv_sec != PUFFS_VNOVAL) {
    844 		error = puffs_access_times(ova.va_uid, ova.va_gid, ova.va_mode,
    845 		    (va->va_vaflags & VA_UTIMES_NULL) != 0, pcr);
    846 		if (error != 0) {
    847 			goto got_error;
    848 		}
    849 		if (va->va_atime.tv_sec != PUFFS_VNOVAL) {
    850 			static struct cmd *c;
    851 			char *ts;
    852 
    853 			error = timespec_to_pgtimestamp(&va->va_atime, &ts);
    854 			if (error != 0) {
    855 				goto got_error;
    856 			}
    857 			CREATECMD(c,
    858 				"UPDATE file "
    859 				"SET atime = $1 "
    860 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
    861 			error = simplecmd(xc, c, ts, fileid);
    862 			free(ts);
    863 			if (error != 0) {
    864 				goto got_error;
    865 			}
    866 		}
    867 		if (va->va_mtime.tv_sec != PUFFS_VNOVAL) {
    868 			static struct cmd *c;
    869 			char *ts;
    870 
    871 			error = timespec_to_pgtimestamp(&va->va_mtime, &ts);
    872 			if (error != 0) {
    873 				goto got_error;
    874 			}
    875 			CREATECMD(c,
    876 				"UPDATE file "
    877 				"SET mtime = $1 "
    878 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
    879 			error = simplecmd(xc, c, ts, fileid);
    880 			free(ts);
    881 			if (error != 0) {
    882 				goto got_error;
    883 			}
    884 		}
    885 		if (va->va_ctime.tv_sec != PUFFS_VNOVAL) {
    886 			static struct cmd *c;
    887 			char *ts;
    888 
    889 			error = timespec_to_pgtimestamp(&va->va_ctime, &ts);
    890 			if (error != 0) {
    891 				goto got_error;
    892 			}
    893 			CREATECMD(c,
    894 				"UPDATE file "
    895 				"SET ctime = $1 "
    896 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
    897 			error = simplecmd(xc, c, ts, fileid);
    898 			free(ts);
    899 			if (error != 0) {
    900 				goto got_error;
    901 			}
    902 		}
    903 		if (va->va_birthtime.tv_sec != PUFFS_VNOVAL) {
    904 			static struct cmd *c;
    905 			char *ts;
    906 
    907 			error = timespec_to_pgtimestamp(&va->va_birthtime, &ts);
    908 			if (error != 0) {
    909 				goto got_error;
    910 			}
    911 			CREATECMD(c,
    912 				"UPDATE file "
    913 				"SET btime = $1 "
    914 				"WHERE fileid = $2", TIMESTAMPTZOID, INT8OID);
    915 			error = simplecmd(xc, c, ts, fileid);
    916 			free(ts);
    917 			if (error != 0) {
    918 				goto got_error;
    919 			}
    920 		}
    921 	}
    922 	if (va->va_size != (uint64_t)PUFFS_VNOVAL) {
    923 		int fd;
    924 
    925 		if (va->va_size > INT_MAX) {
    926 			error = EFBIG;
    927 			goto got_error;
    928 		}
    929 		error = lo_open_by_fileid(xc, fileid, INV_READ|INV_WRITE, &fd);
    930 		if (error != 0) {
    931 			goto got_error;
    932 		}
    933 		error = my_lo_truncate(xc, fd, va->va_size);
    934 		if (error != 0) {
    935 			goto got_error;
    936 		}
    937 		error = my_lo_close(xc, fd);
    938 		if (error != 0) {
    939 			goto got_error;
    940 		}
    941 	}
    942 	error = commit(xc);
    943 	if (error != 0) {
    944 		goto got_error;
    945 	}
    946 	goto done;
    947 got_error:
    948 	rollback(xc);
    949 	if (error == EAGAIN) {
    950 		goto retry;
    951 	}
    952 done:
    953 	fileid_unlock(lock);
    954 	return error;
    955 }
    956 
    957 int
    958 pgfs_node_rename(struct puffs_usermount *pu, puffs_cookie_t src_dir,
    959     puffs_cookie_t src, const struct puffs_cn *pcn_src,
    960     puffs_cookie_t targ_dir, puffs_cookie_t targ,
    961     const struct puffs_cn *pcn_targ)
    962 {
    963 	struct Xconn *xc;
    964 	fileid_t fileid_src_dir = cookie_to_fileid(src_dir);
    965 	fileid_t fileid_src = cookie_to_fileid(src);
    966 	fileid_t fileid_targ_dir = cookie_to_fileid(targ_dir);
    967 	fileid_t fileid_targ = cookie_to_fileid(targ);
    968 	struct vattr va_src;
    969 	struct vattr va_targ;
    970 	int error;
    971 
    972 	DPRINTF("%llu %llu %llu %llu\n", fileid_src_dir, fileid_src,
    973 	    fileid_targ_dir, fileid_targ);
    974 retry:
    975 	xc = begin(pu);
    976 	error = getattr(xc, fileid_src, &va_src, GETATTR_TYPE);
    977 	if (error != 0) {
    978 		goto got_error;
    979 	}
    980 	if (va_src.va_type == VDIR) {
    981 		error = check_path(xc, fileid_src, fileid_targ_dir);
    982 		if (error != 0) {
    983 			goto got_error;
    984 		}
    985 	}
    986 	if (fileid_targ != 0) {
    987 		error = getattr(xc, fileid_targ, &va_targ,
    988 		    GETATTR_TYPE|GETATTR_NLINK);
    989 		if (error != 0) {
    990 			goto got_error;
    991 		}
    992 		if (va_src.va_type == VDIR) {
    993 			if (va_targ.va_type != VDIR) {
    994 				error = ENOTDIR;
    995 				goto got_error;
    996 			}
    997 			if (va_targ.va_nlink != 2) {
    998 				error = ENOTEMPTY;
    999 				goto got_error;
   1000 			}
   1001 		} else if (va_targ.va_type == VDIR) {
   1002 			error = EISDIR;
   1003 			goto got_error;
   1004 		}
   1005 		error = unlinkfile(xc, fileid_targ_dir, pcn_targ->pcn_name,
   1006 		    fileid_targ);
   1007 		if (error == 0 && va_targ.va_type == VDIR) {
   1008 			error = update_nlink(xc, fileid_targ_dir, -1);
   1009 		}
   1010 		if (error != 0) {
   1011 			goto got_error;
   1012 		}
   1013 	}
   1014 	error = linkfile(xc, fileid_targ_dir, pcn_targ->pcn_name, fileid_src);
   1015 	if (error == 0 && va_src.va_type == VDIR) {
   1016 		error = update_nlink(xc, fileid_targ_dir, 1);
   1017 	}
   1018 	if (error != 0) {
   1019 		goto got_error;
   1020 	}
   1021 	/* XXX ctime? */
   1022 	error = unlinkfile(xc, fileid_src_dir, pcn_src->pcn_name, fileid_src);
   1023 	if (error == 0 && va_src.va_type == VDIR) {
   1024 		error = update_nlink(xc, fileid_src_dir, -1);
   1025 	}
   1026 	if (error != 0) {
   1027 		goto got_error;
   1028 	}
   1029 	error = commit(xc);
   1030 	if (error != 0) {
   1031 		goto got_error;
   1032 	}
   1033 	return 0;
   1034 got_error:
   1035 	rollback(xc);
   1036 	if (error == EAGAIN) {
   1037 		goto retry;
   1038 	}
   1039 	return error;
   1040 }
   1041 
   1042 int
   1043 pgfs_node_symlink(struct puffs_usermount *pu, puffs_cookie_t opc,
   1044     struct puffs_newinfo *pni, const struct puffs_cn *pcn,
   1045     const struct vattr *va, const char *target)
   1046 {
   1047 	struct Xconn *xc;
   1048 	struct puffs_cred *pcr = pcn->pcn_cred;
   1049 	fileid_t parent_fileid = cookie_to_fileid(opc);
   1050 	fileid_t new_fileid;
   1051 	size_t resultlen;
   1052 	size_t targetlen;
   1053 	uid_t uid;
   1054 	gid_t gid;
   1055 	int loid;
   1056 	int fd;
   1057 	int error;
   1058 
   1059 	DPRINTF("%llu %s %s\n", parent_fileid, pcn->pcn_name, target);
   1060 	if (puffs_cred_getuid(pcr, &uid) == -1 ||
   1061 	    puffs_cred_getgid(pcr, &gid) == -1) {
   1062 		return errno;
   1063 	}
   1064 retry:
   1065 	xc = begin(pu);
   1066 	error = mklinkfile_lo(xc, parent_fileid, pcn->pcn_name, VLNK,
   1067 	    va->va_mode, uid, gid, &new_fileid, &loid);
   1068 	if (error != 0) {
   1069 		goto got_error;
   1070 	}
   1071 	error = my_lo_open(xc, loid, INV_WRITE, &fd);
   1072 	if (error != 0) {
   1073 		goto got_error;
   1074 	}
   1075 	targetlen = strlen(target);
   1076 	error = my_lo_write(xc, fd, target, targetlen, &resultlen);
   1077 	if (error != 0) {
   1078 		goto got_error;
   1079 	}
   1080 	if (resultlen != targetlen) {
   1081 		error = ENOSPC; /* XXX */
   1082 		goto got_error;
   1083 	}
   1084 	error = commit(xc);
   1085 	if (error != 0) {
   1086 		goto got_error;
   1087 	}
   1088 	puffs_newinfo_setcookie(pni, fileid_to_cookie(new_fileid));
   1089 	return 0;
   1090 got_error:
   1091 	rollback(xc);
   1092 	if (error == EAGAIN) {
   1093 		goto retry;
   1094 	}
   1095 	return error;
   1096 }
   1097 
   1098 int
   1099 pgfs_node_readlink(struct puffs_usermount *pu, puffs_cookie_t opc,
   1100     const struct puffs_cred *pcr, char *buf, size_t *buflenp)
   1101 {
   1102 	fileid_t fileid = cookie_to_fileid(opc);
   1103 	struct Xconn *xc;
   1104 	size_t resultlen;
   1105 	int fd;
   1106 	int error;
   1107 
   1108 	DPRINTF("%llu\n", fileid);
   1109 	xc = begin_readonly(pu);
   1110 	error = lo_open_by_fileid(xc, fileid, INV_READ, &fd);
   1111 	if (error != 0) {
   1112 		rollback(xc);
   1113 		return error;
   1114 	}
   1115 	error = my_lo_read(xc, fd, buf, *buflenp, &resultlen);
   1116 	if (error != 0) {
   1117 		rollback(xc);
   1118 		return error;
   1119 	}
   1120 	assert(resultlen <= *buflenp);
   1121 	error = commit(xc);
   1122 	if (error != 0) {
   1123 		return error;
   1124 	}
   1125 	*buflenp = resultlen;
   1126 	return 0;
   1127 }
   1128 
   1129 int
   1130 pgfs_node_access(struct puffs_usermount *pu, puffs_cookie_t opc,
   1131     int mode, const struct puffs_cred *pcr)
   1132 {
   1133 	struct Xconn *xc;
   1134 	fileid_t fileid = cookie_to_fileid(opc);
   1135 	struct vattr va;
   1136 	int error;
   1137 
   1138 	DPRINTF("%llu\n", fileid);
   1139 retry:
   1140 	xc = begin_readonly(pu);
   1141 	error = getattr(xc, fileid, &va,
   1142 	    GETATTR_TYPE|GETATTR_MODE|GETATTR_UID|GETATTR_GID);
   1143 	if (error != 0) {
   1144 		goto got_error;
   1145 	}
   1146 	error = commit(xc);
   1147 	if (error != 0) {
   1148 		goto got_error;
   1149 	}
   1150 	return puffs_access(va.va_type, va.va_mode, va.va_uid, va.va_gid, mode,
   1151 	    pcr);
   1152 got_error:
   1153 	rollback(xc);
   1154 	if (error == EAGAIN) {
   1155 		goto retry;
   1156 	}
   1157 	return error;
   1158 }
   1159 
   1160 int
   1161 pgfs_node_fsync(struct puffs_usermount *pu, puffs_cookie_t opc,
   1162     const struct puffs_cred *pcr, int flags, off_t offlo, off_t offhi)
   1163 {
   1164 	fileid_t fileid = cookie_to_fileid(opc);
   1165 
   1166 	DPRINTF("%llu\n", fileid);
   1167 	return flush_xacts(pu);
   1168 }
   1169 
   1170 int
   1171 pgfs_fs_statvfs(struct puffs_usermount *pu, struct statvfs *sbp)
   1172 {
   1173 	struct Xconn *xc;
   1174 	uint64_t nfiles;
   1175 	uint64_t bytes;
   1176 	uint64_t lo_bytes;
   1177 	static struct cmd *c_nfiles;
   1178 	static struct cmd *c_bytes;
   1179 	static struct cmd *c_lobytes;
   1180 	static const Oid types[] = { INT8OID, };
   1181 	struct fetchstatus s;
   1182 	int error;
   1183 
   1184 retry:
   1185 	xc = begin_readonly(pu);
   1186 	/*
   1187 	 * use an estimate which we can retrieve quickly, instead of
   1188 	 * "SELECT count(*) from file".
   1189 	 */
   1190 	CREATECMD_NOPARAM(c_nfiles,
   1191 		"SELECT reltuples::int8 "
   1192 		"FROM pg_class c LEFT JOIN pg_namespace n "
   1193 		"ON (n.oid=c.relnamespace) "
   1194 		"WHERE n.nspname = 'pgfs' AND c.relname = 'file'");
   1195 	CREATECMD_NOPARAM(c_bytes,
   1196 		"SELECT sum(pg_total_relation_size(c.oid))::int8 "
   1197 		"FROM pg_class c LEFT JOIN pg_namespace n "
   1198 		"ON (n.oid=c.relnamespace) "
   1199 		"WHERE n.nspname = 'pgfs'");
   1200 	/*
   1201 	 * the following is not correct if someone else is using large objects
   1202 	 * in the same database.  we don't bother to join with datafork it as
   1203 	 * it's too expensive for the little benefit.
   1204 	 */
   1205 	CREATECMD_NOPARAM(c_lobytes,
   1206 		"SELECT pg_total_relation_size('pg_largeobject')::int8");
   1207 	error = sendcmd(xc, c_nfiles);
   1208 	if (error != 0) {
   1209 		goto got_error;
   1210 	}
   1211 	fetchinit(&s, xc);
   1212 	error = FETCHNEXT(&s, types, &nfiles);
   1213 	fetchdone(&s);
   1214 	if (error != 0) {
   1215 		goto got_error;
   1216 	}
   1217 	error = sendcmd(xc, c_bytes);
   1218 	if (error != 0) {
   1219 		goto got_error;
   1220 	}
   1221 	fetchinit(&s, xc);
   1222 	error = FETCHNEXT(&s, types, &bytes);
   1223 	fetchdone(&s);
   1224 	if (error != 0) {
   1225 		goto got_error;
   1226 	}
   1227 	error = sendcmd(xc, c_lobytes);
   1228 	if (error != 0) {
   1229 		goto got_error;
   1230 	}
   1231 	fetchinit(&s, xc);
   1232 	error = FETCHNEXT(&s, types, &lo_bytes);
   1233 	fetchdone(&s);
   1234 	if (error != 0) {
   1235 		goto got_error;
   1236 	}
   1237 	error = commit(xc);
   1238 	if (error != 0) {
   1239 		goto got_error;
   1240 	}
   1241 	/*
   1242 	 * XXX fill f_blocks and f_files with meaningless large values.
   1243 	 * there are no easy way to provide meaningful values for them
   1244 	 * esp. with tablespaces.
   1245 	 */
   1246 	sbp->f_bsize = LOBLKSIZE;
   1247 	sbp->f_frsize = LOBLKSIZE;
   1248 	sbp->f_blocks = INT64_MAX / 100 / sbp->f_frsize;
   1249 	sbp->f_bfree = sbp->f_blocks - howmany(bytes + lo_bytes, sbp->f_frsize);
   1250 	sbp->f_bavail = sbp->f_bfree;
   1251 	sbp->f_bresvd = 0;
   1252 	sbp->f_files = INT_MAX;
   1253 	sbp->f_ffree = sbp->f_files - nfiles;
   1254 	sbp->f_favail = sbp->f_ffree;
   1255 	sbp->f_fresvd = 0;
   1256 	return 0;
   1257 got_error:
   1258 	rollback(xc);
   1259 	if (error == EAGAIN) {
   1260 		goto retry;
   1261 	}
   1262 	return error;
   1263 }
   1264