Home | History | Annotate | Line # | Download | only in btree
bt_seq.c revision 1.10
      1 /*	$NetBSD: bt_seq.c,v 1.10 1997/07/21 14:06:37 jtc Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1990, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Mike Olson.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the University of
     21  *	California, Berkeley and its contributors.
     22  * 4. Neither the name of the University nor the names of its contributors
     23  *    may be used to endorse or promote products derived from this software
     24  *    without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36  * SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 #if defined(LIBC_SCCS) && !defined(lint)
     41 #if 0
     42 static char sccsid[] = "@(#)bt_seq.c	8.7 (Berkeley) 7/20/94";
     43 #else
     44 __RCSID("$NetBSD: bt_seq.c,v 1.10 1997/07/21 14:06:37 jtc Exp $");
     45 #endif
     46 #endif /* LIBC_SCCS and not lint */
     47 
     48 #include "namespace.h"
     49 #include <sys/types.h>
     50 
     51 #include <errno.h>
     52 #include <stddef.h>
     53 #include <stdio.h>
     54 #include <stdlib.h>
     55 
     56 #include <db.h>
     57 #include "btree.h"
     58 
     59 static int __bt_first __P((BTREE *, const DBT *, EPG *, int *));
     60 static int __bt_seqadv __P((BTREE *, EPG *, int));
     61 static int __bt_seqset __P((BTREE *, EPG *, DBT *, int));
     62 
     63 /*
     64  * Sequential scan support.
     65  *
     66  * The tree can be scanned sequentially, starting from either end of the
     67  * tree or from any specific key.  A scan request before any scanning is
     68  * done is initialized as starting from the least node.
     69  */
     70 
     71 /*
     72  * __bt_seq --
     73  *	Btree sequential scan interface.
     74  *
     75  * Parameters:
     76  *	dbp:	pointer to access method
     77  *	key:	key for positioning and return value
     78  *	data:	data return value
     79  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
     80  *
     81  * Returns:
     82  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
     83  */
     84 int
     85 __bt_seq(dbp, key, data, flags)
     86 	const DB *dbp;
     87 	DBT *key, *data;
     88 	u_int flags;
     89 {
     90 	BTREE *t;
     91 	EPG e;
     92 	int status;
     93 
     94 	t = dbp->internal;
     95 
     96 	/* Toss any page pinned across calls. */
     97 	if (t->bt_pinned != NULL) {
     98 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     99 		t->bt_pinned = NULL;
    100 	}
    101 
    102 	/*
    103 	 * If scan unitialized as yet, or starting at a specific record, set
    104 	 * the scan to a specific key.  Both __bt_seqset and __bt_seqadv pin
    105 	 * the page the cursor references if they're successful.
    106 	 */
    107 	switch (flags) {
    108 	case R_NEXT:
    109 	case R_PREV:
    110 		if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
    111 			status = __bt_seqadv(t, &e, flags);
    112 			break;
    113 		}
    114 		/* FALLTHROUGH */
    115 	case R_FIRST:
    116 	case R_LAST:
    117 	case R_CURSOR:
    118 		status = __bt_seqset(t, &e, key, flags);
    119 		break;
    120 	default:
    121 		errno = EINVAL;
    122 		return (RET_ERROR);
    123 	}
    124 
    125 	if (status == RET_SUCCESS) {
    126 		__bt_setcur(t, e.page->pgno, e.index);
    127 
    128 		status =
    129 		    __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
    130 
    131 		/*
    132 		 * If the user is doing concurrent access, we copied the
    133 		 * key/data, toss the page.
    134 		 */
    135 		if (F_ISSET(t, B_DB_LOCK))
    136 			mpool_put(t->bt_mp, e.page, 0);
    137 		else
    138 			t->bt_pinned = e.page;
    139 	}
    140 	return (status);
    141 }
    142 
    143 /*
    144  * __bt_seqset --
    145  *	Set the sequential scan to a specific key.
    146  *
    147  * Parameters:
    148  *	t:	tree
    149  *	ep:	storage for returned key
    150  *	key:	key for initial scan position
    151  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
    152  *
    153  * Side effects:
    154  *	Pins the page the cursor references.
    155  *
    156  * Returns:
    157  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
    158  */
    159 static int
    160 __bt_seqset(t, ep, key, flags)
    161 	BTREE *t;
    162 	EPG *ep;
    163 	DBT *key;
    164 	int flags;
    165 {
    166 	PAGE *h;
    167 	pgno_t pg;
    168 	int exact;
    169 
    170 	/*
    171 	 * Find the first, last or specific key in the tree and point the
    172 	 * cursor at it.  The cursor may not be moved until a new key has
    173 	 * been found.
    174 	 */
    175 	switch (flags) {
    176 	case R_CURSOR:				/* Keyed scan. */
    177 		/*
    178 		 * Find the first instance of the key or the smallest key
    179 		 * which is greater than or equal to the specified key.
    180 		 */
    181 		if (key->data == NULL || key->size == 0) {
    182 			errno = EINVAL;
    183 			return (RET_ERROR);
    184 		}
    185 		return (__bt_first(t, key, ep, &exact));
    186 	case R_FIRST:				/* First record. */
    187 	case R_NEXT:
    188 		/* Walk down the left-hand side of the tree. */
    189 		for (pg = P_ROOT;;) {
    190 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    191 				return (RET_ERROR);
    192 
    193 			/* Check for an empty tree. */
    194 			if (NEXTINDEX(h) == 0) {
    195 				mpool_put(t->bt_mp, h, 0);
    196 				return (RET_SPECIAL);
    197 			}
    198 
    199 			if (h->flags & (P_BLEAF | P_RLEAF))
    200 				break;
    201 			pg = GETBINTERNAL(h, 0)->pgno;
    202 			mpool_put(t->bt_mp, h, 0);
    203 		}
    204 		ep->page = h;
    205 		ep->index = 0;
    206 		break;
    207 	case R_LAST:				/* Last record. */
    208 	case R_PREV:
    209 		/* Walk down the right-hand side of the tree. */
    210 		for (pg = P_ROOT;;) {
    211 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    212 				return (RET_ERROR);
    213 
    214 			/* Check for an empty tree. */
    215 			if (NEXTINDEX(h) == 0) {
    216 				mpool_put(t->bt_mp, h, 0);
    217 				return (RET_SPECIAL);
    218 			}
    219 
    220 			if (h->flags & (P_BLEAF | P_RLEAF))
    221 				break;
    222 			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
    223 			mpool_put(t->bt_mp, h, 0);
    224 		}
    225 
    226 		ep->page = h;
    227 		ep->index = NEXTINDEX(h) - 1;
    228 		break;
    229 	}
    230 	return (RET_SUCCESS);
    231 }
    232 
    233 /*
    234  * __bt_seqadvance --
    235  *	Advance the sequential scan.
    236  *
    237  * Parameters:
    238  *	t:	tree
    239  *	flags:	R_NEXT, R_PREV
    240  *
    241  * Side effects:
    242  *	Pins the page the new key/data record is on.
    243  *
    244  * Returns:
    245  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
    246  */
    247 static int
    248 __bt_seqadv(t, ep, flags)
    249 	BTREE *t;
    250 	EPG *ep;
    251 	int flags;
    252 {
    253 	CURSOR *c;
    254 	PAGE *h;
    255 	indx_t index = 0;	/* pacify gcc */
    256 	pgno_t pg;
    257 	int exact;
    258 
    259 	/*
    260 	 * There are a couple of states that we can be in.  The cursor has
    261 	 * been initialized by the time we get here, but that's all we know.
    262 	 */
    263 	c = &t->bt_cursor;
    264 
    265 	/*
    266 	 * The cursor was deleted where there weren't any duplicate records,
    267 	 * so the key was saved.  Find out where that key would go in the
    268 	 * current tree.  It doesn't matter if the returned key is an exact
    269 	 * match or not -- if it's an exact match, the record was added after
    270 	 * the delete so we can just return it.  If not, as long as there's
    271 	 * a record there, return it.
    272 	 */
    273 	if (F_ISSET(c, CURS_ACQUIRE))
    274 		return (__bt_first(t, &c->key, ep, &exact));
    275 
    276 	/* Get the page referenced by the cursor. */
    277 	if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
    278 		return (RET_ERROR);
    279 
    280 	/*
    281  	 * Find the next/previous record in the tree and point the cursor at
    282 	 * it.  The cursor may not be moved until a new key has been found.
    283 	 */
    284 	switch (flags) {
    285 	case R_NEXT:			/* Next record. */
    286 		/*
    287 		 * The cursor was deleted in duplicate records, and moved
    288 		 * forward to a record that has yet to be returned.  Clear
    289 		 * that flag, and return the record.
    290 		 */
    291 		if (F_ISSET(c, CURS_AFTER))
    292 			goto usecurrent;
    293 		index = c->pg.index;
    294 		if (++index == NEXTINDEX(h)) {
    295 			pg = h->nextpg;
    296 			mpool_put(t->bt_mp, h, 0);
    297 			if (pg == P_INVALID)
    298 				return (RET_SPECIAL);
    299 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    300 				return (RET_ERROR);
    301 			index = 0;
    302 		}
    303 		break;
    304 	case R_PREV:			/* Previous record. */
    305 		/*
    306 		 * The cursor was deleted in duplicate records, and moved
    307 		 * backward to a record that has yet to be returned.  Clear
    308 		 * that flag, and return the record.
    309 		 */
    310 		if (F_ISSET(c, CURS_BEFORE)) {
    311 usecurrent:		F_CLR(c, CURS_AFTER | CURS_BEFORE);
    312 			ep->page = h;
    313 			ep->index = c->pg.index;
    314 			return (RET_SUCCESS);
    315 		}
    316 		index = c->pg.index;
    317 		if (index == 0) {
    318 			pg = h->prevpg;
    319 			mpool_put(t->bt_mp, h, 0);
    320 			if (pg == P_INVALID)
    321 				return (RET_SPECIAL);
    322 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    323 				return (RET_ERROR);
    324 			index = NEXTINDEX(h) - 1;
    325 		} else
    326 			--index;
    327 		break;
    328 	}
    329 
    330 	ep->page = h;
    331 	ep->index = index;
    332 	return (RET_SUCCESS);
    333 }
    334 
    335 /*
    336  * __bt_first --
    337  *	Find the first entry.
    338  *
    339  * Parameters:
    340  *	t:	the tree
    341  *    key:	the key
    342  *  erval:	return EPG
    343  * exactp:	pointer to exact match flag
    344  *
    345  * Returns:
    346  *	The first entry in the tree greater than or equal to key,
    347  *	or RET_SPECIAL if no such key exists.
    348  */
    349 static int
    350 __bt_first(t, key, erval, exactp)
    351 	BTREE *t;
    352 	const DBT *key;
    353 	EPG *erval;
    354 	int *exactp;
    355 {
    356 	PAGE *h;
    357 	EPG *ep, save;
    358 	pgno_t pg;
    359 
    360 	/*
    361 	 * Find any matching record; __bt_search pins the page.
    362 	 *
    363 	 * If it's an exact match and duplicates are possible, walk backwards
    364 	 * in the tree until we find the first one.  Otherwise, make sure it's
    365 	 * a valid key (__bt_search may return an index just past the end of a
    366 	 * page) and return it.
    367 	 */
    368 	if ((ep = __bt_search(t, key, exactp)) == NULL)
    369 		return (0);
    370 	if (*exactp) {
    371 		if (F_ISSET(t, B_NODUPS)) {
    372 			*erval = *ep;
    373 			return (RET_SUCCESS);
    374 		}
    375 
    376 		/*
    377 		 * Walk backwards, as long as the entry matches and there are
    378 		 * keys left in the tree.  Save a copy of each match in case
    379 		 * we go too far.
    380 		 */
    381 		save = *ep;
    382 		h = ep->page;
    383 		do {
    384 			if (save.page->pgno != ep->page->pgno) {
    385 				mpool_put(t->bt_mp, save.page, 0);
    386 				save = *ep;
    387 			} else
    388 				save.index = ep->index;
    389 
    390 			/*
    391 			 * Don't unpin the page the last (or original) match
    392 			 * was on, but make sure it's unpinned if an error
    393 			 * occurs.
    394 			 */
    395 			if (ep->index == 0) {
    396 				if (h->prevpg == P_INVALID)
    397 					break;
    398 				if (h->pgno != save.page->pgno)
    399 					mpool_put(t->bt_mp, h, 0);
    400 				if ((h = mpool_get(t->bt_mp,
    401 				    h->prevpg, 0)) == NULL) {
    402 					if (h->pgno == save.page->pgno)
    403 						mpool_put(t->bt_mp,
    404 						    save.page, 0);
    405 					return (RET_ERROR);
    406 				}
    407 				ep->page = h;
    408 				ep->index = NEXTINDEX(h);
    409 			}
    410 			--ep->index;
    411 		} while (__bt_cmp(t, key, ep) == 0);
    412 
    413 		/*
    414 		 * Reach here with the last page that was looked at pinned,
    415 		 * which may or may not be the same as the last (or original)
    416 		 * match page.  If it's not useful, release it.
    417 		 */
    418 		if (h->pgno != save.page->pgno)
    419 			mpool_put(t->bt_mp, h, 0);
    420 
    421 		*erval = save;
    422 		return (RET_SUCCESS);
    423 	}
    424 
    425 	/* If at the end of a page, find the next entry. */
    426 	if (ep->index == NEXTINDEX(ep->page)) {
    427 		h = ep->page;
    428 		pg = h->nextpg;
    429 		mpool_put(t->bt_mp, h, 0);
    430 		if (pg == P_INVALID)
    431 			return (RET_SPECIAL);
    432 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    433 			return (RET_ERROR);
    434 		ep->index = 0;
    435 		ep->page = h;
    436 	}
    437 	*erval = *ep;
    438 	return (RET_SUCCESS);
    439 }
    440 
    441 /*
    442  * __bt_setcur --
    443  *	Set the cursor to an entry in the tree.
    444  *
    445  * Parameters:
    446  *	t:	the tree
    447  *   pgno:	page number
    448  *  index:	page index
    449  */
    450 void
    451 __bt_setcur(t, pgno, index)
    452 	BTREE *t;
    453 	pgno_t pgno;
    454 	u_int index;
    455 {
    456 	/* Lose any already deleted key. */
    457 	if (t->bt_cursor.key.data != NULL) {
    458 		free(t->bt_cursor.key.data);
    459 		t->bt_cursor.key.size = 0;
    460 		t->bt_cursor.key.data = NULL;
    461 	}
    462 	F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
    463 
    464 	/* Update the cursor. */
    465 	t->bt_cursor.pg.pgno = pgno;
    466 	t->bt_cursor.pg.index = index;
    467 	F_SET(&t->bt_cursor, CURS_INIT);
    468 }
    469