Home | History | Annotate | Line # | Download | only in btree
bt_seq.c revision 1.1.1.1
      1      1.1  cgd /*-
      2      1.1  cgd  * Copyright (c) 1990, 1993
      3      1.1  cgd  *	The Regents of the University of California.  All rights reserved.
      4      1.1  cgd  *
      5      1.1  cgd  * This code is derived from software contributed to Berkeley by
      6      1.1  cgd  * Mike Olson.
      7      1.1  cgd  *
      8      1.1  cgd  * Redistribution and use in source and binary forms, with or without
      9      1.1  cgd  * modification, are permitted provided that the following conditions
     10      1.1  cgd  * are met:
     11      1.1  cgd  * 1. Redistributions of source code must retain the above copyright
     12      1.1  cgd  *    notice, this list of conditions and the following disclaimer.
     13      1.1  cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14      1.1  cgd  *    notice, this list of conditions and the following disclaimer in the
     15      1.1  cgd  *    documentation and/or other materials provided with the distribution.
     16      1.1  cgd  * 3. All advertising materials mentioning features or use of this software
     17      1.1  cgd  *    must display the following acknowledgement:
     18      1.1  cgd  *	This product includes software developed by the University of
     19      1.1  cgd  *	California, Berkeley and its contributors.
     20      1.1  cgd  * 4. Neither the name of the University nor the names of its contributors
     21      1.1  cgd  *    may be used to endorse or promote products derived from this software
     22      1.1  cgd  *    without specific prior written permission.
     23      1.1  cgd  *
     24      1.1  cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25      1.1  cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26      1.1  cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27      1.1  cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28      1.1  cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29      1.1  cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30      1.1  cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31      1.1  cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32      1.1  cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33      1.1  cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34      1.1  cgd  * SUCH DAMAGE.
     35      1.1  cgd  */
     36      1.1  cgd 
     37      1.1  cgd #if defined(LIBC_SCCS) && !defined(lint)
     38  1.1.1.1  cgd static char sccsid[] = "@(#)bt_seq.c	8.2 (Berkeley) 9/7/93";
     39      1.1  cgd #endif /* LIBC_SCCS and not lint */
     40      1.1  cgd 
     41      1.1  cgd #include <sys/types.h>
     42      1.1  cgd 
     43      1.1  cgd #include <errno.h>
     44      1.1  cgd #include <stddef.h>
     45      1.1  cgd #include <stdio.h>
     46      1.1  cgd #include <stdlib.h>
     47      1.1  cgd 
     48      1.1  cgd #include <db.h>
     49      1.1  cgd #include "btree.h"
     50      1.1  cgd 
     51      1.1  cgd static int	 bt_seqadv __P((BTREE *, EPG *, int));
     52      1.1  cgd static int	 bt_seqset __P((BTREE *, EPG *, DBT *, int));
     53      1.1  cgd 
     54      1.1  cgd /*
     55      1.1  cgd  * Sequential scan support.
     56      1.1  cgd  *
     57      1.1  cgd  * The tree can be scanned sequentially, starting from either end of the tree
     58      1.1  cgd  * or from any specific key.  A scan request before any scanning is done is
     59      1.1  cgd  * initialized as starting from the least node.
     60      1.1  cgd  *
     61      1.1  cgd  * Each tree has an EPGNO which has the current position of the cursor.  The
     62      1.1  cgd  * cursor has to survive deletions/insertions in the tree without losing its
     63      1.1  cgd  * position.  This is done by noting deletions without doing them, and then
     64      1.1  cgd  * doing them when the cursor moves (or the tree is closed).
     65      1.1  cgd  */
     66      1.1  cgd 
     67      1.1  cgd /*
     68      1.1  cgd  * __BT_SEQ -- Btree sequential scan interface.
     69      1.1  cgd  *
     70      1.1  cgd  * Parameters:
     71      1.1  cgd  *	dbp:	pointer to access method
     72      1.1  cgd  *	key:	key for positioning and return value
     73      1.1  cgd  *	data:	data return value
     74      1.1  cgd  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
     75      1.1  cgd  *
     76      1.1  cgd  * Returns:
     77      1.1  cgd  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
     78      1.1  cgd  */
     79      1.1  cgd int
     80      1.1  cgd __bt_seq(dbp, key, data, flags)
     81      1.1  cgd 	const DB *dbp;
     82      1.1  cgd 	DBT *key, *data;
     83      1.1  cgd 	u_int flags;
     84      1.1  cgd {
     85      1.1  cgd 	BTREE *t;
     86      1.1  cgd 	EPG e;
     87      1.1  cgd 	int status;
     88      1.1  cgd 
     89  1.1.1.1  cgd 	t = dbp->internal;
     90  1.1.1.1  cgd 
     91  1.1.1.1  cgd 	/* Toss any page pinned across calls. */
     92  1.1.1.1  cgd 	if (t->bt_pinned != NULL) {
     93  1.1.1.1  cgd 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     94  1.1.1.1  cgd 		t->bt_pinned = NULL;
     95  1.1.1.1  cgd 	}
     96  1.1.1.1  cgd 
     97      1.1  cgd 	/*
     98      1.1  cgd 	 * If scan unitialized as yet, or starting at a specific record, set
     99      1.1  cgd 	 * the scan to a specific key.  Both bt_seqset and bt_seqadv pin the
    100      1.1  cgd 	 * page the cursor references if they're successful.
    101      1.1  cgd 	 */
    102      1.1  cgd 	switch(flags) {
    103      1.1  cgd 	case R_NEXT:
    104      1.1  cgd 	case R_PREV:
    105      1.1  cgd 		if (ISSET(t, B_SEQINIT)) {
    106      1.1  cgd 			status = bt_seqadv(t, &e, flags);
    107      1.1  cgd 			break;
    108      1.1  cgd 		}
    109      1.1  cgd 		/* FALLTHROUGH */
    110      1.1  cgd 	case R_CURSOR:
    111      1.1  cgd 	case R_FIRST:
    112      1.1  cgd 	case R_LAST:
    113      1.1  cgd 		status = bt_seqset(t, &e, key, flags);
    114      1.1  cgd 		break;
    115      1.1  cgd 	default:
    116      1.1  cgd 		errno = EINVAL;
    117      1.1  cgd 		return (RET_ERROR);
    118      1.1  cgd 	}
    119      1.1  cgd 
    120      1.1  cgd 	if (status == RET_SUCCESS) {
    121      1.1  cgd 		status = __bt_ret(t, &e, key, data);
    122      1.1  cgd 
    123      1.1  cgd 		/* Update the actual cursor. */
    124      1.1  cgd 		t->bt_bcursor.pgno = e.page->pgno;
    125      1.1  cgd 		t->bt_bcursor.index = e.index;
    126  1.1.1.1  cgd 
    127  1.1.1.1  cgd 		/*
    128  1.1.1.1  cgd 		 * If the user is doing concurrent access, we copied the
    129  1.1.1.1  cgd 		 * key/data, toss the page.
    130  1.1.1.1  cgd 		 */
    131  1.1.1.1  cgd 		if (ISSET(t, B_DB_LOCK))
    132  1.1.1.1  cgd 			mpool_put(t->bt_mp, e.page, 0);
    133  1.1.1.1  cgd 		else
    134  1.1.1.1  cgd 			t->bt_pinned = e.page;
    135      1.1  cgd 		SET(t, B_SEQINIT);
    136      1.1  cgd 	}
    137      1.1  cgd 	return (status);
    138      1.1  cgd }
    139      1.1  cgd 
    140      1.1  cgd /*
    141      1.1  cgd  * BT_SEQSET -- Set the sequential scan to a specific key.
    142      1.1  cgd  *
    143      1.1  cgd  * Parameters:
    144      1.1  cgd  *	t:	tree
    145      1.1  cgd  *	ep:	storage for returned key
    146      1.1  cgd  *	key:	key for initial scan position
    147      1.1  cgd  *	flags:	R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
    148      1.1  cgd  *
    149      1.1  cgd  * Side effects:
    150      1.1  cgd  *	Pins the page the cursor references.
    151      1.1  cgd  *
    152      1.1  cgd  * Returns:
    153      1.1  cgd  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
    154      1.1  cgd  */
    155      1.1  cgd static int
    156      1.1  cgd bt_seqset(t, ep, key, flags)
    157      1.1  cgd 	BTREE *t;
    158      1.1  cgd 	EPG *ep;
    159      1.1  cgd 	DBT *key;
    160      1.1  cgd 	int flags;
    161      1.1  cgd {
    162      1.1  cgd 	EPG *e;
    163      1.1  cgd 	PAGE *h;
    164      1.1  cgd 	pgno_t pg;
    165      1.1  cgd 	int exact;
    166      1.1  cgd 
    167      1.1  cgd 	/*
    168      1.1  cgd 	 * Delete any already deleted record that we've been saving because
    169      1.1  cgd 	 * the cursor pointed to it.  Since going to a specific key, should
    170      1.1  cgd 	 * delete any logically deleted records so they aren't found.
    171      1.1  cgd 	 */
    172      1.1  cgd 	if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
    173      1.1  cgd 		return (RET_ERROR);
    174      1.1  cgd 
    175      1.1  cgd 	/*
    176      1.1  cgd 	 * Find the first, last or specific key in the tree and point the cursor
    177      1.1  cgd 	 * at it.  The cursor may not be moved until a new key has been found.
    178      1.1  cgd 	 */
    179      1.1  cgd 	switch(flags) {
    180      1.1  cgd 	case R_CURSOR:				/* Keyed scan. */
    181      1.1  cgd 		/*
    182      1.1  cgd 		 * Find the first instance of the key or the smallest key which
    183      1.1  cgd 		 * is greater than or equal to the specified key.  If run out
    184      1.1  cgd 		 * of keys, return RET_SPECIAL.
    185      1.1  cgd 		 */
    186      1.1  cgd 		if (key->data == NULL || key->size == 0) {
    187      1.1  cgd 			errno = EINVAL;
    188      1.1  cgd 			return (RET_ERROR);
    189      1.1  cgd 		}
    190      1.1  cgd 		e = __bt_first(t, key, &exact);	/* Returns pinned page. */
    191      1.1  cgd 		if (e == NULL)
    192      1.1  cgd 			return (RET_ERROR);
    193      1.1  cgd 		/*
    194      1.1  cgd 		 * If at the end of a page, skip any empty pages and find the
    195      1.1  cgd 		 * next entry.
    196      1.1  cgd 		 */
    197      1.1  cgd 		if (e->index == NEXTINDEX(e->page)) {
    198      1.1  cgd 			h = e->page;
    199      1.1  cgd 			do {
    200      1.1  cgd 				pg = h->nextpg;
    201      1.1  cgd 				mpool_put(t->bt_mp, h, 0);
    202      1.1  cgd 				if (pg == P_INVALID)
    203      1.1  cgd 					return (RET_SPECIAL);
    204      1.1  cgd 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    205      1.1  cgd 					return (RET_ERROR);
    206      1.1  cgd 			} while (NEXTINDEX(h) == 0);
    207      1.1  cgd 			e->index = 0;
    208      1.1  cgd 			e->page = h;
    209      1.1  cgd 		}
    210      1.1  cgd 		*ep = *e;
    211      1.1  cgd 		break;
    212      1.1  cgd 	case R_FIRST:				/* First record. */
    213      1.1  cgd 	case R_NEXT:
    214      1.1  cgd 		/* Walk down the left-hand side of the tree. */
    215      1.1  cgd 		for (pg = P_ROOT;;) {
    216      1.1  cgd 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    217      1.1  cgd 				return (RET_ERROR);
    218      1.1  cgd 			if (h->flags & (P_BLEAF | P_RLEAF))
    219      1.1  cgd 				break;
    220      1.1  cgd 			pg = GETBINTERNAL(h, 0)->pgno;
    221      1.1  cgd 			mpool_put(t->bt_mp, h, 0);
    222      1.1  cgd 		}
    223      1.1  cgd 
    224      1.1  cgd 		/* Skip any empty pages. */
    225      1.1  cgd 		while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
    226      1.1  cgd 			pg = h->nextpg;
    227      1.1  cgd 			mpool_put(t->bt_mp, h, 0);
    228      1.1  cgd 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    229      1.1  cgd 				return (RET_ERROR);
    230      1.1  cgd 		}
    231      1.1  cgd 
    232      1.1  cgd 		if (NEXTINDEX(h) == 0) {
    233      1.1  cgd 			mpool_put(t->bt_mp, h, 0);
    234      1.1  cgd 			return (RET_SPECIAL);
    235      1.1  cgd 		}
    236      1.1  cgd 
    237      1.1  cgd 		ep->page = h;
    238      1.1  cgd 		ep->index = 0;
    239      1.1  cgd 		break;
    240      1.1  cgd 	case R_LAST:				/* Last record. */
    241      1.1  cgd 	case R_PREV:
    242      1.1  cgd 		/* Walk down the right-hand side of the tree. */
    243      1.1  cgd 		for (pg = P_ROOT;;) {
    244      1.1  cgd 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    245      1.1  cgd 				return (RET_ERROR);
    246      1.1  cgd 			if (h->flags & (P_BLEAF | P_RLEAF))
    247      1.1  cgd 				break;
    248      1.1  cgd 			pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
    249      1.1  cgd 			mpool_put(t->bt_mp, h, 0);
    250      1.1  cgd 		}
    251      1.1  cgd 
    252      1.1  cgd 		/* Skip any empty pages. */
    253      1.1  cgd 		while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
    254      1.1  cgd 			pg = h->prevpg;
    255      1.1  cgd 			mpool_put(t->bt_mp, h, 0);
    256      1.1  cgd 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    257      1.1  cgd 				return (RET_ERROR);
    258      1.1  cgd 		}
    259      1.1  cgd 
    260      1.1  cgd 		if (NEXTINDEX(h) == 0) {
    261      1.1  cgd 			mpool_put(t->bt_mp, h, 0);
    262      1.1  cgd 			return (RET_SPECIAL);
    263      1.1  cgd 		}
    264      1.1  cgd 
    265      1.1  cgd 		ep->page = h;
    266      1.1  cgd 		ep->index = NEXTINDEX(h) - 1;
    267      1.1  cgd 		break;
    268      1.1  cgd 	}
    269      1.1  cgd 	return (RET_SUCCESS);
    270      1.1  cgd }
    271      1.1  cgd 
    272      1.1  cgd /*
    273      1.1  cgd  * BT_SEQADVANCE -- Advance the sequential scan.
    274      1.1  cgd  *
    275      1.1  cgd  * Parameters:
    276      1.1  cgd  *	t:	tree
    277      1.1  cgd  *	flags:	R_NEXT, R_PREV
    278      1.1  cgd  *
    279      1.1  cgd  * Side effects:
    280      1.1  cgd  *	Pins the page the new key/data record is on.
    281      1.1  cgd  *
    282      1.1  cgd  * Returns:
    283      1.1  cgd  *	RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
    284      1.1  cgd  */
    285      1.1  cgd static int
    286      1.1  cgd bt_seqadv(t, e, flags)
    287      1.1  cgd 	BTREE *t;
    288      1.1  cgd 	EPG *e;
    289      1.1  cgd 	int flags;
    290      1.1  cgd {
    291      1.1  cgd 	EPGNO *c, delc;
    292      1.1  cgd 	PAGE *h;
    293      1.1  cgd 	indx_t index;
    294      1.1  cgd 	pgno_t pg;
    295      1.1  cgd 
    296      1.1  cgd 	/* Save the current cursor if going to delete it. */
    297      1.1  cgd 	c = &t->bt_bcursor;
    298      1.1  cgd 	if (ISSET(t, B_DELCRSR))
    299      1.1  cgd 		delc = *c;
    300      1.1  cgd 
    301      1.1  cgd 	if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
    302      1.1  cgd 		return (RET_ERROR);
    303      1.1  cgd 
    304      1.1  cgd 	/*
    305      1.1  cgd  	 * Find the next/previous record in the tree and point the cursor at it.
    306      1.1  cgd 	 * The cursor may not be moved until a new key has been found.
    307      1.1  cgd 	 */
    308      1.1  cgd 	index = c->index;
    309      1.1  cgd 	switch(flags) {
    310      1.1  cgd 	case R_NEXT:			/* Next record. */
    311      1.1  cgd 		if (++index == NEXTINDEX(h)) {
    312      1.1  cgd 			do {
    313      1.1  cgd 				pg = h->nextpg;
    314      1.1  cgd 				mpool_put(t->bt_mp, h, 0);
    315      1.1  cgd 				if (pg == P_INVALID)
    316      1.1  cgd 					return (RET_SPECIAL);
    317      1.1  cgd 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    318      1.1  cgd 					return (RET_ERROR);
    319      1.1  cgd 			} while (NEXTINDEX(h) == 0);
    320      1.1  cgd 			index = 0;
    321      1.1  cgd 		}
    322      1.1  cgd 		break;
    323      1.1  cgd 	case R_PREV:			/* Previous record. */
    324      1.1  cgd 		if (index-- == 0) {
    325      1.1  cgd 			do {
    326      1.1  cgd 				pg = h->prevpg;
    327      1.1  cgd 				mpool_put(t->bt_mp, h, 0);
    328      1.1  cgd 				if (pg == P_INVALID)
    329      1.1  cgd 					return (RET_SPECIAL);
    330      1.1  cgd 				if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    331      1.1  cgd 					return (RET_ERROR);
    332      1.1  cgd 			} while (NEXTINDEX(h) == 0);
    333      1.1  cgd 			index = NEXTINDEX(h) - 1;
    334      1.1  cgd 		}
    335      1.1  cgd 		break;
    336      1.1  cgd 	}
    337      1.1  cgd 
    338      1.1  cgd 	e->page = h;
    339      1.1  cgd 	e->index = index;
    340      1.1  cgd 
    341      1.1  cgd 	/*
    342      1.1  cgd 	 * Delete any already deleted record that we've been saving because the
    343      1.1  cgd 	 * cursor pointed to it.  This could cause the new index to be shifted
    344      1.1  cgd 	 * down by one if the record we're deleting is on the same page and has
    345      1.1  cgd 	 * a larger index.
    346      1.1  cgd 	 */
    347      1.1  cgd 	if (ISSET(t, B_DELCRSR)) {
    348      1.1  cgd 		CLR(t, B_DELCRSR);			/* Don't try twice. */
    349      1.1  cgd 		if (c->pgno == delc.pgno && c->index > delc.index)
    350      1.1  cgd 			--c->index;
    351      1.1  cgd 		if (__bt_crsrdel(t, &delc))
    352      1.1  cgd 			return (RET_ERROR);
    353      1.1  cgd 	}
    354      1.1  cgd 	return (RET_SUCCESS);
    355      1.1  cgd }
    356      1.1  cgd 
    357      1.1  cgd /*
    358      1.1  cgd  * __BT_CRSRDEL -- Delete the record referenced by the cursor.
    359      1.1  cgd  *
    360      1.1  cgd  * Parameters:
    361      1.1  cgd  *	t:	tree
    362      1.1  cgd  *
    363      1.1  cgd  * Returns:
    364      1.1  cgd  *	RET_ERROR, RET_SUCCESS
    365      1.1  cgd  */
    366      1.1  cgd int
    367      1.1  cgd __bt_crsrdel(t, c)
    368      1.1  cgd 	BTREE *t;
    369      1.1  cgd 	EPGNO *c;
    370      1.1  cgd {
    371      1.1  cgd 	PAGE *h;
    372      1.1  cgd 	int status;
    373      1.1  cgd 
    374      1.1  cgd 	CLR(t, B_DELCRSR);			/* Don't try twice. */
    375      1.1  cgd 	if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
    376      1.1  cgd 		return (RET_ERROR);
    377      1.1  cgd 	status = __bt_dleaf(t, h, c->index);
    378      1.1  cgd 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    379      1.1  cgd 	return (status);
    380      1.1  cgd }
    381