Home | History | Annotate | Line # | Download | only in btree
bt_delete.c revision 1.7.2.1
      1  1.7.2.1  jtc /*	$NetBSD: bt_delete.c,v 1.7.2.1 1996/09/16 18:39:36 jtc Exp $	*/
      2      1.6  cgd 
      3      1.1  cgd /*-
      4      1.5  cgd  * Copyright (c) 1990, 1993, 1994
      5      1.1  cgd  *	The Regents of the University of California.  All rights reserved.
      6      1.1  cgd  *
      7      1.1  cgd  * This code is derived from software contributed to Berkeley by
      8      1.1  cgd  * Mike Olson.
      9      1.1  cgd  *
     10      1.1  cgd  * Redistribution and use in source and binary forms, with or without
     11      1.1  cgd  * modification, are permitted provided that the following conditions
     12      1.1  cgd  * are met:
     13      1.1  cgd  * 1. Redistributions of source code must retain the above copyright
     14      1.1  cgd  *    notice, this list of conditions and the following disclaimer.
     15      1.1  cgd  * 2. Redistributions in binary form must reproduce the above copyright
     16      1.1  cgd  *    notice, this list of conditions and the following disclaimer in the
     17      1.1  cgd  *    documentation and/or other materials provided with the distribution.
     18      1.1  cgd  * 3. All advertising materials mentioning features or use of this software
     19      1.1  cgd  *    must display the following acknowledgement:
     20      1.1  cgd  *	This product includes software developed by the University of
     21      1.1  cgd  *	California, Berkeley and its contributors.
     22      1.1  cgd  * 4. Neither the name of the University nor the names of its contributors
     23      1.1  cgd  *    may be used to endorse or promote products derived from this software
     24      1.1  cgd  *    without specific prior written permission.
     25      1.1  cgd  *
     26      1.1  cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27      1.1  cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28      1.1  cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29      1.1  cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30      1.1  cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31      1.1  cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32      1.1  cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33      1.1  cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34      1.1  cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35      1.1  cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36      1.1  cgd  * SUCH DAMAGE.
     37      1.1  cgd  */
     38      1.1  cgd 
     39      1.1  cgd #if defined(LIBC_SCCS) && !defined(lint)
     40      1.6  cgd #if 0
     41      1.7  cgd static char sccsid[] = "@(#)bt_delete.c	8.13 (Berkeley) 7/28/94";
     42      1.6  cgd #else
     43  1.7.2.1  jtc static char rcsid[] = "$NetBSD: bt_delete.c,v 1.7.2.1 1996/09/16 18:39:36 jtc Exp $";
     44      1.6  cgd #endif
     45      1.1  cgd #endif /* LIBC_SCCS and not lint */
     46      1.1  cgd 
     47  1.7.2.1  jtc #include "namespace.h"
     48      1.1  cgd #include <sys/types.h>
     49      1.1  cgd 
     50      1.1  cgd #include <errno.h>
     51      1.1  cgd #include <stdio.h>
     52      1.1  cgd #include <string.h>
     53      1.1  cgd 
     54      1.1  cgd #include <db.h>
     55      1.1  cgd #include "btree.h"
     56      1.1  cgd 
     57      1.7  cgd static int __bt_bdelete __P((BTREE *, const DBT *));
     58      1.7  cgd static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int));
     59      1.7  cgd static int __bt_pdelete __P((BTREE *, PAGE *));
     60      1.7  cgd static int __bt_relink __P((BTREE *, PAGE *));
     61      1.7  cgd static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *));
     62      1.1  cgd 
     63      1.1  cgd /*
     64      1.7  cgd  * __bt_delete
     65      1.7  cgd  *	Delete the item(s) referenced by a key.
     66      1.1  cgd  *
     67      1.7  cgd  * Return RET_SPECIAL if the key is not found.
     68      1.1  cgd  */
     69      1.1  cgd int
     70      1.1  cgd __bt_delete(dbp, key, flags)
     71      1.1  cgd 	const DB *dbp;
     72      1.1  cgd 	const DBT *key;
     73      1.1  cgd 	u_int flags;
     74      1.1  cgd {
     75      1.1  cgd 	BTREE *t;
     76      1.7  cgd 	CURSOR *c;
     77      1.7  cgd 	PAGE *h;
     78      1.1  cgd 	int status;
     79      1.1  cgd 
     80      1.1  cgd 	t = dbp->internal;
     81      1.4  cgd 
     82      1.4  cgd 	/* Toss any page pinned across calls. */
     83      1.4  cgd 	if (t->bt_pinned != NULL) {
     84      1.4  cgd 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     85      1.4  cgd 		t->bt_pinned = NULL;
     86      1.4  cgd 	}
     87      1.4  cgd 
     88      1.7  cgd 	/* Check for change to a read-only tree. */
     89      1.7  cgd 	if (F_ISSET(t, B_RDONLY)) {
     90      1.1  cgd 		errno = EPERM;
     91      1.1  cgd 		return (RET_ERROR);
     92      1.1  cgd 	}
     93      1.4  cgd 
     94      1.7  cgd 	switch (flags) {
     95      1.1  cgd 	case 0:
     96      1.7  cgd 		status = __bt_bdelete(t, key);
     97      1.1  cgd 		break;
     98      1.1  cgd 	case R_CURSOR:
     99      1.1  cgd 		/*
    100      1.7  cgd 		 * If flags is R_CURSOR, delete the cursor.  Must already
    101      1.7  cgd 		 * have started a scan and not have already deleted it.
    102      1.1  cgd 		 */
    103      1.7  cgd 		c = &t->bt_cursor;
    104      1.7  cgd 		if (F_ISSET(c, CURS_INIT)) {
    105      1.7  cgd 			if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
    106      1.7  cgd 				return (RET_SPECIAL);
    107      1.7  cgd 			if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
    108      1.7  cgd 				return (RET_ERROR);
    109      1.7  cgd 
    110      1.7  cgd 			/*
    111      1.7  cgd 			 * If the page is about to be emptied, we'll need to
    112      1.7  cgd 			 * delete it, which means we have to acquire a stack.
    113      1.7  cgd 			 */
    114      1.7  cgd 			if (NEXTINDEX(h) == 1)
    115      1.7  cgd 				if (__bt_stkacq(t, &h, &t->bt_cursor))
    116      1.7  cgd 					return (RET_ERROR);
    117      1.7  cgd 
    118      1.7  cgd 			status = __bt_dleaf(t, NULL, h, c->pg.index);
    119      1.7  cgd 
    120      1.7  cgd 			if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
    121      1.7  cgd 				if (__bt_pdelete(t, h))
    122      1.7  cgd 					return (RET_ERROR);
    123      1.7  cgd 			} else
    124      1.7  cgd 				mpool_put(t->bt_mp,
    125      1.7  cgd 				    h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
    126      1.7  cgd 			break;
    127      1.7  cgd 		}
    128      1.7  cgd 		/* FALLTHROUGH */
    129      1.1  cgd 	default:
    130      1.7  cgd 		errno = EINVAL;
    131      1.1  cgd 		return (RET_ERROR);
    132      1.1  cgd 	}
    133      1.1  cgd 	if (status == RET_SUCCESS)
    134      1.7  cgd 		F_SET(t, B_MODIFIED);
    135      1.1  cgd 	return (status);
    136      1.1  cgd }
    137      1.1  cgd 
    138      1.1  cgd /*
    139      1.7  cgd  * __bt_stkacq --
    140      1.7  cgd  *	Acquire a stack so we can delete a cursor entry.
    141      1.7  cgd  *
    142      1.7  cgd  * Parameters:
    143      1.7  cgd  *	  t:	tree
    144      1.7  cgd  *	 hp:	pointer to current, pinned PAGE pointer
    145      1.7  cgd  *	  c:	pointer to the cursor
    146      1.7  cgd  *
    147      1.7  cgd  * Returns:
    148      1.7  cgd  *	0 on success, 1 on failure
    149      1.7  cgd  */
    150      1.7  cgd static int
    151      1.7  cgd __bt_stkacq(t, hp, c)
    152      1.7  cgd 	BTREE *t;
    153      1.7  cgd 	PAGE **hp;
    154      1.7  cgd 	CURSOR *c;
    155      1.7  cgd {
    156      1.7  cgd 	BINTERNAL *bi;
    157      1.7  cgd 	EPG *e;
    158      1.7  cgd 	EPGNO *parent;
    159      1.7  cgd 	PAGE *h;
    160      1.7  cgd 	indx_t index;
    161      1.7  cgd 	pgno_t pgno;
    162      1.7  cgd 	recno_t nextpg, prevpg;
    163      1.7  cgd 	int exact, level;
    164      1.7  cgd 
    165      1.7  cgd 	/*
    166      1.7  cgd 	 * Find the first occurrence of the key in the tree.  Toss the
    167      1.7  cgd 	 * currently locked page so we don't hit an already-locked page.
    168      1.7  cgd 	 */
    169      1.7  cgd 	h = *hp;
    170      1.7  cgd 	mpool_put(t->bt_mp, h, 0);
    171      1.7  cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    172      1.7  cgd 		return (1);
    173      1.7  cgd 	h = e->page;
    174      1.7  cgd 
    175      1.7  cgd 	/* See if we got it in one shot. */
    176      1.7  cgd 	if (h->pgno == c->pg.pgno)
    177      1.7  cgd 		goto ret;
    178      1.7  cgd 
    179      1.7  cgd 	/*
    180      1.7  cgd 	 * Move right, looking for the page.  At each move we have to move
    181      1.7  cgd 	 * up the stack until we don't have to move to the next page.  If
    182      1.7  cgd 	 * we have to change pages at an internal level, we have to fix the
    183      1.7  cgd 	 * stack back up.
    184      1.7  cgd 	 */
    185      1.7  cgd 	while (h->pgno != c->pg.pgno) {
    186      1.7  cgd 		if ((nextpg = h->nextpg) == P_INVALID)
    187      1.7  cgd 			break;
    188      1.7  cgd 		mpool_put(t->bt_mp, h, 0);
    189      1.7  cgd 
    190      1.7  cgd 		/* Move up the stack. */
    191      1.7  cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    192      1.7  cgd 			/* Get the parent page. */
    193      1.7  cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    194      1.7  cgd 				return (1);
    195      1.7  cgd 
    196      1.7  cgd 			/* Move to the next index. */
    197      1.7  cgd 			if (parent->index != NEXTINDEX(h) - 1) {
    198      1.7  cgd 				index = parent->index + 1;
    199      1.7  cgd 				BT_PUSH(t, h->pgno, index);
    200      1.7  cgd 				break;
    201      1.7  cgd 			}
    202      1.7  cgd 			mpool_put(t->bt_mp, h, 0);
    203      1.7  cgd 		}
    204      1.7  cgd 
    205      1.7  cgd 		/* Restore the stack. */
    206      1.7  cgd 		while (level--) {
    207      1.7  cgd 			/* Push the next level down onto the stack. */
    208      1.7  cgd 			bi = GETBINTERNAL(h, index);
    209      1.7  cgd 			pgno = bi->pgno;
    210      1.7  cgd 			BT_PUSH(t, pgno, 0);
    211      1.7  cgd 
    212      1.7  cgd 			/* Lose the currently pinned page. */
    213      1.7  cgd 			mpool_put(t->bt_mp, h, 0);
    214      1.7  cgd 
    215      1.7  cgd 			/* Get the next level down. */
    216      1.7  cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    217      1.7  cgd 				return (1);
    218      1.7  cgd 			index = 0;
    219      1.7  cgd 		}
    220      1.7  cgd 		mpool_put(t->bt_mp, h, 0);
    221      1.7  cgd 		if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
    222      1.7  cgd 			return (1);
    223      1.7  cgd 	}
    224      1.7  cgd 
    225      1.7  cgd 	if (h->pgno == c->pg.pgno)
    226      1.7  cgd 		goto ret;
    227      1.7  cgd 
    228      1.7  cgd 	/* Reacquire the original stack. */
    229      1.7  cgd 	mpool_put(t->bt_mp, h, 0);
    230      1.7  cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    231      1.7  cgd 		return (1);
    232      1.7  cgd 	h = e->page;
    233      1.7  cgd 
    234      1.7  cgd 	/*
    235      1.7  cgd 	 * Move left, looking for the page.  At each move we have to move
    236      1.7  cgd 	 * up the stack until we don't have to change pages to move to the
    237      1.7  cgd 	 * next page.  If we have to change pages at an internal level, we
    238      1.7  cgd 	 * have to fix the stack back up.
    239      1.7  cgd 	 */
    240      1.7  cgd 	while (h->pgno != c->pg.pgno) {
    241      1.7  cgd 		if ((prevpg = h->prevpg) == P_INVALID)
    242      1.7  cgd 			break;
    243      1.7  cgd 		mpool_put(t->bt_mp, h, 0);
    244      1.7  cgd 
    245      1.7  cgd 		/* Move up the stack. */
    246      1.7  cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    247      1.7  cgd 			/* Get the parent page. */
    248      1.7  cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    249      1.7  cgd 				return (1);
    250      1.7  cgd 
    251      1.7  cgd 			/* Move to the next index. */
    252      1.7  cgd 			if (parent->index != 0) {
    253      1.7  cgd 				index = parent->index - 1;
    254      1.7  cgd 				BT_PUSH(t, h->pgno, index);
    255      1.7  cgd 				break;
    256      1.7  cgd 			}
    257      1.7  cgd 			mpool_put(t->bt_mp, h, 0);
    258      1.7  cgd 		}
    259      1.7  cgd 
    260      1.7  cgd 		/* Restore the stack. */
    261      1.7  cgd 		while (level--) {
    262      1.7  cgd 			/* Push the next level down onto the stack. */
    263      1.7  cgd 			bi = GETBINTERNAL(h, index);
    264      1.7  cgd 			pgno = bi->pgno;
    265      1.7  cgd 
    266      1.7  cgd 			/* Lose the currently pinned page. */
    267      1.7  cgd 			mpool_put(t->bt_mp, h, 0);
    268      1.7  cgd 
    269      1.7  cgd 			/* Get the next level down. */
    270      1.7  cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    271      1.7  cgd 				return (1);
    272      1.7  cgd 
    273      1.7  cgd 			index = NEXTINDEX(h) - 1;
    274      1.7  cgd 			BT_PUSH(t, pgno, index);
    275      1.7  cgd 		}
    276      1.7  cgd 		mpool_put(t->bt_mp, h, 0);
    277      1.7  cgd 		if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
    278      1.7  cgd 			return (1);
    279      1.7  cgd 	}
    280      1.7  cgd 
    281      1.7  cgd 
    282      1.7  cgd ret:	mpool_put(t->bt_mp, h, 0);
    283      1.7  cgd 	return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
    284      1.7  cgd }
    285      1.7  cgd 
    286      1.7  cgd /*
    287      1.7  cgd  * __bt_bdelete --
    288      1.7  cgd  *	Delete all key/data pairs matching the specified key.
    289      1.1  cgd  *
    290      1.1  cgd  * Parameters:
    291      1.7  cgd  *	  t:	tree
    292      1.1  cgd  *	key:	key to delete
    293      1.1  cgd  *
    294      1.1  cgd  * Returns:
    295      1.1  cgd  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
    296      1.1  cgd  */
    297      1.1  cgd static int
    298      1.7  cgd __bt_bdelete(t, key)
    299      1.1  cgd 	BTREE *t;
    300      1.1  cgd 	const DBT *key;
    301      1.1  cgd {
    302      1.7  cgd 	EPG *e;
    303      1.1  cgd 	PAGE *h;
    304      1.7  cgd 	int deleted, exact, redo;
    305      1.7  cgd 
    306      1.7  cgd 	deleted = 0;
    307      1.1  cgd 
    308      1.1  cgd 	/* Find any matching record; __bt_search pins the page. */
    309      1.7  cgd loop:	if ((e = __bt_search(t, key, &exact)) == NULL)
    310      1.7  cgd 		return (deleted ? RET_SUCCESS : RET_ERROR);
    311      1.1  cgd 	if (!exact) {
    312      1.1  cgd 		mpool_put(t->bt_mp, e->page, 0);
    313      1.7  cgd 		return (deleted ? RET_SUCCESS : RET_SPECIAL);
    314      1.1  cgd 	}
    315      1.1  cgd 
    316      1.1  cgd 	/*
    317      1.7  cgd 	 * Delete forward, then delete backward, from the found key.  If
    318      1.7  cgd 	 * there are duplicates and we reach either side of the page, do
    319      1.7  cgd 	 * the key search again, so that we get them all.
    320      1.1  cgd 	 */
    321      1.7  cgd 	redo = 0;
    322      1.7  cgd 	h = e->page;
    323      1.7  cgd 	do {
    324      1.7  cgd 		if (__bt_dleaf(t, key, h, e->index)) {
    325      1.7  cgd 			mpool_put(t->bt_mp, h, 0);
    326      1.7  cgd 			return (RET_ERROR);
    327      1.7  cgd 		}
    328      1.7  cgd 		if (F_ISSET(t, B_NODUPS)) {
    329      1.7  cgd 			if (NEXTINDEX(h) == 0) {
    330      1.7  cgd 				if (__bt_pdelete(t, h))
    331      1.1  cgd 					return (RET_ERROR);
    332      1.7  cgd 			} else
    333      1.7  cgd 				mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    334      1.7  cgd 			return (RET_SUCCESS);
    335      1.7  cgd 		}
    336      1.7  cgd 		deleted = 1;
    337      1.7  cgd 	} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
    338      1.7  cgd 
    339      1.7  cgd 	/* Check for right-hand edge of the page. */
    340      1.7  cgd 	if (e->index == NEXTINDEX(h))
    341      1.7  cgd 		redo = 1;
    342      1.1  cgd 
    343      1.7  cgd 	/* Delete from the key to the beginning of the page. */
    344      1.7  cgd 	while (e->index-- > 0) {
    345      1.7  cgd 		if (__bt_cmp(t, key, e) != 0)
    346      1.1  cgd 			break;
    347      1.7  cgd 		if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
    348      1.7  cgd 			mpool_put(t->bt_mp, h, 0);
    349      1.7  cgd 			return (RET_ERROR);
    350      1.1  cgd 		}
    351      1.7  cgd 		if (e->index == 0)
    352      1.7  cgd 			redo = 1;
    353      1.7  cgd 	}
    354      1.1  cgd 
    355      1.7  cgd 	/* Check for an empty page. */
    356      1.7  cgd 	if (NEXTINDEX(h) == 0) {
    357      1.7  cgd 		if (__bt_pdelete(t, h))
    358      1.7  cgd 			return (RET_ERROR);
    359      1.7  cgd 		goto loop;
    360      1.1  cgd 	}
    361      1.1  cgd 
    362      1.7  cgd 	/* Put the page. */
    363      1.7  cgd 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    364      1.7  cgd 
    365      1.7  cgd 	if (redo)
    366      1.7  cgd 		goto loop;
    367      1.7  cgd 	return (RET_SUCCESS);
    368      1.7  cgd }
    369      1.7  cgd 
    370      1.7  cgd /*
    371      1.7  cgd  * __bt_pdelete --
    372      1.7  cgd  *	Delete a single page from the tree.
    373      1.7  cgd  *
    374      1.7  cgd  * Parameters:
    375      1.7  cgd  *	t:	tree
    376      1.7  cgd  *	h:	leaf page
    377      1.7  cgd  *
    378      1.7  cgd  * Returns:
    379      1.7  cgd  *	RET_SUCCESS, RET_ERROR.
    380      1.7  cgd  *
    381      1.7  cgd  * Side-effects:
    382      1.7  cgd  *	mpool_put's the page
    383      1.7  cgd  */
    384      1.7  cgd static int
    385      1.7  cgd __bt_pdelete(t, h)
    386      1.7  cgd 	BTREE *t;
    387      1.7  cgd 	PAGE *h;
    388      1.7  cgd {
    389      1.7  cgd 	BINTERNAL *bi;
    390      1.7  cgd 	PAGE *pg;
    391      1.7  cgd 	EPGNO *parent;
    392      1.7  cgd 	indx_t cnt, index, *ip, offset;
    393      1.7  cgd 	u_int32_t nksize;
    394      1.7  cgd 	char *from;
    395      1.7  cgd 
    396      1.1  cgd 	/*
    397      1.7  cgd 	 * Walk the parent page stack -- a LIFO stack of the pages that were
    398      1.7  cgd 	 * traversed when we searched for the page where the delete occurred.
    399      1.7  cgd 	 * Each stack entry is a page number and a page index offset.  The
    400      1.7  cgd 	 * offset is for the page traversed on the search.  We've just deleted
    401      1.7  cgd 	 * a page, so we have to delete the key from the parent page.
    402      1.7  cgd 	 *
    403      1.7  cgd 	 * If the delete from the parent page makes it empty, this process may
    404      1.7  cgd 	 * continue all the way up the tree.  We stop if we reach the root page
    405      1.7  cgd 	 * (which is never deleted, it's just not worth the effort) or if the
    406      1.7  cgd 	 * delete does not empty the page.
    407      1.1  cgd 	 */
    408      1.7  cgd 	while ((parent = BT_POP(t)) != NULL) {
    409      1.7  cgd 		/* Get the parent page. */
    410      1.7  cgd 		if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    411      1.7  cgd 			return (RET_ERROR);
    412      1.7  cgd 
    413      1.7  cgd 		index = parent->index;
    414      1.7  cgd 		bi = GETBINTERNAL(pg, index);
    415      1.7  cgd 
    416      1.7  cgd 		/* Free any overflow pages. */
    417      1.7  cgd 		if (bi->flags & P_BIGKEY &&
    418      1.7  cgd 		    __ovfl_delete(t, bi->bytes) == RET_ERROR) {
    419      1.7  cgd 			mpool_put(t->bt_mp, pg, 0);
    420      1.7  cgd 			return (RET_ERROR);
    421      1.7  cgd 		}
    422      1.1  cgd 
    423      1.7  cgd 		/*
    424      1.7  cgd 		 * Free the parent if it has only the one key and it's not the
    425      1.7  cgd 		 * root page. If it's the rootpage, turn it back into an empty
    426      1.7  cgd 		 * leaf page.
    427      1.7  cgd 		 */
    428      1.7  cgd 		if (NEXTINDEX(pg) == 1)
    429      1.7  cgd 			if (pg->pgno == P_ROOT) {
    430      1.7  cgd 				pg->lower = BTDATAOFF;
    431      1.7  cgd 				pg->upper = t->bt_psize;
    432      1.7  cgd 				pg->flags = P_BLEAF;
    433      1.1  cgd 			} else {
    434      1.7  cgd 				if (__bt_relink(t, pg) || __bt_free(t, pg))
    435      1.1  cgd 					return (RET_ERROR);
    436      1.7  cgd 				continue;
    437      1.1  cgd 			}
    438      1.7  cgd 		else {
    439      1.7  cgd 			/* Pack remaining key items at the end of the page. */
    440      1.7  cgd 			nksize = NBINTERNAL(bi->ksize);
    441      1.7  cgd 			from = (char *)pg + pg->upper;
    442      1.7  cgd 			memmove(from + nksize, from, (char *)bi - from);
    443      1.7  cgd 			pg->upper += nksize;
    444      1.7  cgd 
    445      1.7  cgd 			/* Adjust indices' offsets, shift the indices down. */
    446      1.7  cgd 			offset = pg->linp[index];
    447      1.7  cgd 			for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip)
    448      1.7  cgd 				if (ip[0] < offset)
    449      1.7  cgd 					ip[0] += nksize;
    450      1.7  cgd 			for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip)
    451      1.7  cgd 				ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
    452      1.7  cgd 			pg->lower -= sizeof(indx_t);
    453      1.1  cgd 		}
    454      1.1  cgd 
    455      1.7  cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    456      1.7  cgd 		break;
    457      1.1  cgd 	}
    458      1.1  cgd 
    459      1.7  cgd 	/* Free the leaf page, as long as it wasn't the root. */
    460      1.7  cgd 	if (h->pgno == P_ROOT) {
    461      1.7  cgd 		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    462      1.7  cgd 		return (RET_SUCCESS);
    463      1.7  cgd 	}
    464      1.7  cgd 	return (__bt_relink(t, h) || __bt_free(t, h));
    465      1.1  cgd }
    466      1.1  cgd 
    467      1.1  cgd /*
    468      1.7  cgd  * __bt_dleaf --
    469      1.7  cgd  *	Delete a single record from a leaf page.
    470      1.1  cgd  *
    471      1.1  cgd  * Parameters:
    472      1.1  cgd  *	t:	tree
    473      1.7  cgd  *    key:	referenced key
    474      1.7  cgd  *	h:	page
    475      1.7  cgd  *	index:	index on page to delete
    476      1.1  cgd  *
    477      1.1  cgd  * Returns:
    478      1.1  cgd  *	RET_SUCCESS, RET_ERROR.
    479      1.1  cgd  */
    480      1.1  cgd int
    481      1.7  cgd __bt_dleaf(t, key, h, index)
    482      1.1  cgd 	BTREE *t;
    483      1.7  cgd 	const DBT *key;
    484      1.1  cgd 	PAGE *h;
    485      1.7  cgd 	u_int index;
    486      1.1  cgd {
    487      1.7  cgd 	BLEAF *bl;
    488      1.7  cgd 	indx_t cnt, *ip, offset;
    489      1.7  cgd 	u_int32_t nbytes;
    490      1.7  cgd 	void *to;
    491      1.1  cgd 	char *from;
    492      1.1  cgd 
    493      1.7  cgd 	/* If this record is referenced by the cursor, delete the cursor. */
    494      1.7  cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    495      1.7  cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    496      1.7  cgd 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index &&
    497      1.7  cgd 	    __bt_curdel(t, key, h, index))
    498      1.7  cgd 		return (RET_ERROR);
    499      1.7  cgd 
    500      1.7  cgd 	/* If the entry uses overflow pages, make them available for reuse. */
    501      1.1  cgd 	to = bl = GETBLEAF(h, index);
    502      1.1  cgd 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
    503      1.1  cgd 		return (RET_ERROR);
    504      1.1  cgd 	if (bl->flags & P_BIGDATA &&
    505      1.1  cgd 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
    506      1.1  cgd 		return (RET_ERROR);
    507      1.7  cgd 
    508      1.7  cgd 	/* Pack the remaining key/data items at the end of the page. */
    509      1.1  cgd 	nbytes = NBLEAF(bl);
    510      1.1  cgd 	from = (char *)h + h->upper;
    511      1.1  cgd 	memmove(from + nbytes, from, (char *)to - from);
    512      1.1  cgd 	h->upper += nbytes;
    513      1.1  cgd 
    514      1.7  cgd 	/* Adjust the indices' offsets, shift the indices down. */
    515      1.1  cgd 	offset = h->linp[index];
    516      1.1  cgd 	for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
    517      1.1  cgd 		if (ip[0] < offset)
    518      1.1  cgd 			ip[0] += nbytes;
    519      1.1  cgd 	for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
    520      1.1  cgd 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
    521      1.1  cgd 	h->lower -= sizeof(indx_t);
    522      1.7  cgd 
    523      1.7  cgd 	/* If the cursor is on this page, adjust it as necessary. */
    524      1.7  cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    525      1.7  cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    526      1.7  cgd 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index)
    527      1.7  cgd 		--t->bt_cursor.pg.index;
    528      1.7  cgd 
    529      1.1  cgd 	return (RET_SUCCESS);
    530      1.7  cgd }
    531      1.7  cgd 
    532      1.7  cgd /*
    533      1.7  cgd  * __bt_curdel --
    534      1.7  cgd  *	Delete the cursor.
    535      1.7  cgd  *
    536      1.7  cgd  * Parameters:
    537      1.7  cgd  *	t:	tree
    538      1.7  cgd  *    key:	referenced key (or NULL)
    539      1.7  cgd  *	h:	page
    540      1.7  cgd  *  index:	index on page to delete
    541      1.7  cgd  *
    542      1.7  cgd  * Returns:
    543      1.7  cgd  *	RET_SUCCESS, RET_ERROR.
    544      1.7  cgd  */
    545      1.7  cgd static int
    546      1.7  cgd __bt_curdel(t, key, h, index)
    547      1.7  cgd 	BTREE *t;
    548      1.7  cgd 	const DBT *key;
    549      1.7  cgd 	PAGE *h;
    550      1.7  cgd 	u_int index;
    551      1.7  cgd {
    552      1.7  cgd 	CURSOR *c;
    553      1.7  cgd 	EPG e;
    554      1.7  cgd 	PAGE *pg;
    555      1.7  cgd 	int curcopy, status;
    556      1.7  cgd 
    557      1.7  cgd 	/*
    558      1.7  cgd 	 * If there are duplicates, move forward or backward to one.
    559      1.7  cgd 	 * Otherwise, copy the key into the cursor area.
    560      1.7  cgd 	 */
    561      1.7  cgd 	c = &t->bt_cursor;
    562      1.7  cgd 	F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
    563      1.7  cgd 
    564      1.7  cgd 	curcopy = 0;
    565      1.7  cgd 	if (!F_ISSET(t, B_NODUPS)) {
    566      1.7  cgd 		/*
    567      1.7  cgd 		 * We're going to have to do comparisons.  If we weren't
    568      1.7  cgd 		 * provided a copy of the key, i.e. the user is deleting
    569      1.7  cgd 		 * the current cursor position, get one.
    570      1.7  cgd 		 */
    571      1.7  cgd 		if (key == NULL) {
    572      1.7  cgd 			e.page = h;
    573      1.7  cgd 			e.index = index;
    574      1.7  cgd 			if ((status = __bt_ret(t, &e,
    575      1.7  cgd 			    &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
    576      1.7  cgd 				return (status);
    577      1.7  cgd 			curcopy = 1;
    578      1.7  cgd 			key = &c->key;
    579      1.7  cgd 		}
    580      1.7  cgd 		/* Check previous key, if not at the beginning of the page. */
    581      1.7  cgd 		if (index > 0) {
    582      1.7  cgd 			e.page = h;
    583      1.7  cgd 			e.index = index - 1;
    584      1.7  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    585      1.7  cgd 				F_SET(c, CURS_BEFORE);
    586      1.7  cgd 				goto dup2;
    587      1.7  cgd 			}
    588      1.7  cgd 		}
    589      1.7  cgd 		/* Check next key, if not at the end of the page. */
    590      1.7  cgd 		if (index < NEXTINDEX(h) - 1) {
    591      1.7  cgd 			e.page = h;
    592      1.7  cgd 			e.index = index + 1;
    593      1.7  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    594      1.7  cgd 				F_SET(c, CURS_AFTER);
    595      1.7  cgd 				goto dup2;
    596      1.7  cgd 			}
    597      1.7  cgd 		}
    598      1.7  cgd 		/* Check previous key if at the beginning of the page. */
    599      1.7  cgd 		if (index == 0 && h->prevpg != P_INVALID) {
    600      1.7  cgd 			if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    601      1.7  cgd 				return (RET_ERROR);
    602      1.7  cgd 			e.page = pg;
    603      1.7  cgd 			e.index = NEXTINDEX(pg) - 1;
    604      1.7  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    605      1.7  cgd 				F_SET(c, CURS_BEFORE);
    606      1.7  cgd 				goto dup1;
    607      1.7  cgd 			}
    608      1.7  cgd 			mpool_put(t->bt_mp, pg, 0);
    609      1.7  cgd 		}
    610      1.7  cgd 		/* Check next key if at the end of the page. */
    611      1.7  cgd 		if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
    612      1.7  cgd 			if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    613      1.7  cgd 				return (RET_ERROR);
    614      1.7  cgd 			e.page = pg;
    615      1.7  cgd 			e.index = 0;
    616      1.7  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    617      1.7  cgd 				F_SET(c, CURS_AFTER);
    618      1.7  cgd dup1:				mpool_put(t->bt_mp, pg, 0);
    619      1.7  cgd dup2:				c->pg.pgno = e.page->pgno;
    620      1.7  cgd 				c->pg.index = e.index;
    621      1.7  cgd 				return (RET_SUCCESS);
    622      1.7  cgd 			}
    623      1.7  cgd 			mpool_put(t->bt_mp, pg, 0);
    624      1.7  cgd 		}
    625      1.7  cgd 	}
    626      1.7  cgd 	e.page = h;
    627      1.7  cgd 	e.index = index;
    628      1.7  cgd 	if (curcopy || (status =
    629      1.7  cgd 	    __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
    630      1.7  cgd 		F_SET(c, CURS_ACQUIRE);
    631      1.7  cgd 		return (RET_SUCCESS);
    632      1.7  cgd 	}
    633      1.7  cgd 	return (status);
    634      1.7  cgd }
    635      1.7  cgd 
    636      1.7  cgd /*
    637      1.7  cgd  * __bt_relink --
    638      1.7  cgd  *	Link around a deleted page.
    639      1.7  cgd  *
    640      1.7  cgd  * Parameters:
    641      1.7  cgd  *	t:	tree
    642      1.7  cgd  *	h:	page to be deleted
    643      1.7  cgd  */
    644      1.7  cgd static int
    645      1.7  cgd __bt_relink(t, h)
    646      1.7  cgd 	BTREE *t;
    647      1.7  cgd 	PAGE *h;
    648      1.7  cgd {
    649      1.7  cgd 	PAGE *pg;
    650      1.7  cgd 
    651      1.7  cgd 	if (h->nextpg != P_INVALID) {
    652      1.7  cgd 		if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    653      1.7  cgd 			return (RET_ERROR);
    654      1.7  cgd 		pg->prevpg = h->prevpg;
    655      1.7  cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    656      1.7  cgd 	}
    657      1.7  cgd 	if (h->prevpg != P_INVALID) {
    658      1.7  cgd 		if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    659      1.7  cgd 			return (RET_ERROR);
    660      1.7  cgd 		pg->nextpg = h->nextpg;
    661      1.7  cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    662      1.7  cgd 	}
    663      1.7  cgd 	return (0);
    664      1.1  cgd }
    665