Home | History | Annotate | Line # | Download | only in btree
bt_delete.c revision 1.9
      1  1.9       jtc /*	$NetBSD: bt_delete.c,v 1.9 1997/07/21 14:06:31 jtc Exp $	*/
      2  1.6       cgd 
      3  1.1       cgd /*-
      4  1.5       cgd  * Copyright (c) 1990, 1993, 1994
      5  1.1       cgd  *	The Regents of the University of California.  All rights reserved.
      6  1.1       cgd  *
      7  1.1       cgd  * This code is derived from software contributed to Berkeley by
      8  1.1       cgd  * Mike Olson.
      9  1.1       cgd  *
     10  1.1       cgd  * Redistribution and use in source and binary forms, with or without
     11  1.1       cgd  * modification, are permitted provided that the following conditions
     12  1.1       cgd  * are met:
     13  1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     14  1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     15  1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     17  1.1       cgd  *    documentation and/or other materials provided with the distribution.
     18  1.1       cgd  * 3. All advertising materials mentioning features or use of this software
     19  1.1       cgd  *    must display the following acknowledgement:
     20  1.1       cgd  *	This product includes software developed by the University of
     21  1.1       cgd  *	California, Berkeley and its contributors.
     22  1.1       cgd  * 4. Neither the name of the University nor the names of its contributors
     23  1.1       cgd  *    may be used to endorse or promote products derived from this software
     24  1.1       cgd  *    without specific prior written permission.
     25  1.1       cgd  *
     26  1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27  1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28  1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29  1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30  1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31  1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32  1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33  1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34  1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35  1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36  1.1       cgd  * SUCH DAMAGE.
     37  1.1       cgd  */
     38  1.1       cgd 
     39  1.8  christos #include <sys/cdefs.h>
     40  1.1       cgd #if defined(LIBC_SCCS) && !defined(lint)
     41  1.6       cgd #if 0
     42  1.7       cgd static char sccsid[] = "@(#)bt_delete.c	8.13 (Berkeley) 7/28/94";
     43  1.6       cgd #else
     44  1.9       jtc __RCSID("$NetBSD: bt_delete.c,v 1.9 1997/07/21 14:06:31 jtc Exp $");
     45  1.6       cgd #endif
     46  1.1       cgd #endif /* LIBC_SCCS and not lint */
     47  1.1       cgd 
     48  1.9       jtc #include "namespace.h"
     49  1.1       cgd #include <sys/types.h>
     50  1.1       cgd 
     51  1.1       cgd #include <errno.h>
     52  1.1       cgd #include <stdio.h>
     53  1.1       cgd #include <string.h>
     54  1.1       cgd 
     55  1.1       cgd #include <db.h>
     56  1.1       cgd #include "btree.h"
     57  1.1       cgd 
     58  1.7       cgd static int __bt_bdelete __P((BTREE *, const DBT *));
     59  1.7       cgd static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int));
     60  1.7       cgd static int __bt_pdelete __P((BTREE *, PAGE *));
     61  1.7       cgd static int __bt_relink __P((BTREE *, PAGE *));
     62  1.7       cgd static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *));
     63  1.1       cgd 
     64  1.1       cgd /*
     65  1.7       cgd  * __bt_delete
     66  1.7       cgd  *	Delete the item(s) referenced by a key.
     67  1.1       cgd  *
     68  1.7       cgd  * Return RET_SPECIAL if the key is not found.
     69  1.1       cgd  */
     70  1.1       cgd int
     71  1.1       cgd __bt_delete(dbp, key, flags)
     72  1.1       cgd 	const DB *dbp;
     73  1.1       cgd 	const DBT *key;
     74  1.1       cgd 	u_int flags;
     75  1.1       cgd {
     76  1.1       cgd 	BTREE *t;
     77  1.7       cgd 	CURSOR *c;
     78  1.7       cgd 	PAGE *h;
     79  1.1       cgd 	int status;
     80  1.1       cgd 
     81  1.1       cgd 	t = dbp->internal;
     82  1.4       cgd 
     83  1.4       cgd 	/* Toss any page pinned across calls. */
     84  1.4       cgd 	if (t->bt_pinned != NULL) {
     85  1.4       cgd 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     86  1.4       cgd 		t->bt_pinned = NULL;
     87  1.4       cgd 	}
     88  1.4       cgd 
     89  1.7       cgd 	/* Check for change to a read-only tree. */
     90  1.7       cgd 	if (F_ISSET(t, B_RDONLY)) {
     91  1.1       cgd 		errno = EPERM;
     92  1.1       cgd 		return (RET_ERROR);
     93  1.1       cgd 	}
     94  1.4       cgd 
     95  1.7       cgd 	switch (flags) {
     96  1.1       cgd 	case 0:
     97  1.7       cgd 		status = __bt_bdelete(t, key);
     98  1.1       cgd 		break;
     99  1.1       cgd 	case R_CURSOR:
    100  1.1       cgd 		/*
    101  1.7       cgd 		 * If flags is R_CURSOR, delete the cursor.  Must already
    102  1.7       cgd 		 * have started a scan and not have already deleted it.
    103  1.1       cgd 		 */
    104  1.7       cgd 		c = &t->bt_cursor;
    105  1.7       cgd 		if (F_ISSET(c, CURS_INIT)) {
    106  1.7       cgd 			if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
    107  1.7       cgd 				return (RET_SPECIAL);
    108  1.7       cgd 			if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
    109  1.7       cgd 				return (RET_ERROR);
    110  1.7       cgd 
    111  1.7       cgd 			/*
    112  1.7       cgd 			 * If the page is about to be emptied, we'll need to
    113  1.7       cgd 			 * delete it, which means we have to acquire a stack.
    114  1.7       cgd 			 */
    115  1.7       cgd 			if (NEXTINDEX(h) == 1)
    116  1.7       cgd 				if (__bt_stkacq(t, &h, &t->bt_cursor))
    117  1.7       cgd 					return (RET_ERROR);
    118  1.7       cgd 
    119  1.7       cgd 			status = __bt_dleaf(t, NULL, h, c->pg.index);
    120  1.7       cgd 
    121  1.7       cgd 			if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
    122  1.7       cgd 				if (__bt_pdelete(t, h))
    123  1.7       cgd 					return (RET_ERROR);
    124  1.7       cgd 			} else
    125  1.7       cgd 				mpool_put(t->bt_mp,
    126  1.7       cgd 				    h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
    127  1.7       cgd 			break;
    128  1.7       cgd 		}
    129  1.7       cgd 		/* FALLTHROUGH */
    130  1.1       cgd 	default:
    131  1.7       cgd 		errno = EINVAL;
    132  1.1       cgd 		return (RET_ERROR);
    133  1.1       cgd 	}
    134  1.1       cgd 	if (status == RET_SUCCESS)
    135  1.7       cgd 		F_SET(t, B_MODIFIED);
    136  1.1       cgd 	return (status);
    137  1.1       cgd }
    138  1.1       cgd 
    139  1.1       cgd /*
    140  1.7       cgd  * __bt_stkacq --
    141  1.7       cgd  *	Acquire a stack so we can delete a cursor entry.
    142  1.7       cgd  *
    143  1.7       cgd  * Parameters:
    144  1.7       cgd  *	  t:	tree
    145  1.7       cgd  *	 hp:	pointer to current, pinned PAGE pointer
    146  1.7       cgd  *	  c:	pointer to the cursor
    147  1.7       cgd  *
    148  1.7       cgd  * Returns:
    149  1.7       cgd  *	0 on success, 1 on failure
    150  1.7       cgd  */
    151  1.7       cgd static int
    152  1.7       cgd __bt_stkacq(t, hp, c)
    153  1.7       cgd 	BTREE *t;
    154  1.7       cgd 	PAGE **hp;
    155  1.7       cgd 	CURSOR *c;
    156  1.7       cgd {
    157  1.7       cgd 	BINTERNAL *bi;
    158  1.7       cgd 	EPG *e;
    159  1.7       cgd 	EPGNO *parent;
    160  1.7       cgd 	PAGE *h;
    161  1.8  christos 	indx_t index = 0;	/* Pacify gcc */
    162  1.7       cgd 	pgno_t pgno;
    163  1.7       cgd 	recno_t nextpg, prevpg;
    164  1.7       cgd 	int exact, level;
    165  1.7       cgd 
    166  1.7       cgd 	/*
    167  1.7       cgd 	 * Find the first occurrence of the key in the tree.  Toss the
    168  1.7       cgd 	 * currently locked page so we don't hit an already-locked page.
    169  1.7       cgd 	 */
    170  1.7       cgd 	h = *hp;
    171  1.7       cgd 	mpool_put(t->bt_mp, h, 0);
    172  1.7       cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    173  1.7       cgd 		return (1);
    174  1.7       cgd 	h = e->page;
    175  1.7       cgd 
    176  1.7       cgd 	/* See if we got it in one shot. */
    177  1.7       cgd 	if (h->pgno == c->pg.pgno)
    178  1.7       cgd 		goto ret;
    179  1.7       cgd 
    180  1.7       cgd 	/*
    181  1.7       cgd 	 * Move right, looking for the page.  At each move we have to move
    182  1.7       cgd 	 * up the stack until we don't have to move to the next page.  If
    183  1.7       cgd 	 * we have to change pages at an internal level, we have to fix the
    184  1.7       cgd 	 * stack back up.
    185  1.7       cgd 	 */
    186  1.7       cgd 	while (h->pgno != c->pg.pgno) {
    187  1.7       cgd 		if ((nextpg = h->nextpg) == P_INVALID)
    188  1.7       cgd 			break;
    189  1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    190  1.7       cgd 
    191  1.7       cgd 		/* Move up the stack. */
    192  1.7       cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    193  1.7       cgd 			/* Get the parent page. */
    194  1.7       cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    195  1.7       cgd 				return (1);
    196  1.7       cgd 
    197  1.7       cgd 			/* Move to the next index. */
    198  1.7       cgd 			if (parent->index != NEXTINDEX(h) - 1) {
    199  1.7       cgd 				index = parent->index + 1;
    200  1.7       cgd 				BT_PUSH(t, h->pgno, index);
    201  1.7       cgd 				break;
    202  1.7       cgd 			}
    203  1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    204  1.7       cgd 		}
    205  1.7       cgd 
    206  1.7       cgd 		/* Restore the stack. */
    207  1.7       cgd 		while (level--) {
    208  1.7       cgd 			/* Push the next level down onto the stack. */
    209  1.7       cgd 			bi = GETBINTERNAL(h, index);
    210  1.7       cgd 			pgno = bi->pgno;
    211  1.7       cgd 			BT_PUSH(t, pgno, 0);
    212  1.7       cgd 
    213  1.7       cgd 			/* Lose the currently pinned page. */
    214  1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    215  1.7       cgd 
    216  1.7       cgd 			/* Get the next level down. */
    217  1.7       cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    218  1.7       cgd 				return (1);
    219  1.7       cgd 			index = 0;
    220  1.7       cgd 		}
    221  1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    222  1.7       cgd 		if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
    223  1.7       cgd 			return (1);
    224  1.7       cgd 	}
    225  1.7       cgd 
    226  1.7       cgd 	if (h->pgno == c->pg.pgno)
    227  1.7       cgd 		goto ret;
    228  1.7       cgd 
    229  1.7       cgd 	/* Reacquire the original stack. */
    230  1.7       cgd 	mpool_put(t->bt_mp, h, 0);
    231  1.7       cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    232  1.7       cgd 		return (1);
    233  1.7       cgd 	h = e->page;
    234  1.7       cgd 
    235  1.7       cgd 	/*
    236  1.7       cgd 	 * Move left, looking for the page.  At each move we have to move
    237  1.7       cgd 	 * up the stack until we don't have to change pages to move to the
    238  1.7       cgd 	 * next page.  If we have to change pages at an internal level, we
    239  1.7       cgd 	 * have to fix the stack back up.
    240  1.7       cgd 	 */
    241  1.7       cgd 	while (h->pgno != c->pg.pgno) {
    242  1.7       cgd 		if ((prevpg = h->prevpg) == P_INVALID)
    243  1.7       cgd 			break;
    244  1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    245  1.7       cgd 
    246  1.7       cgd 		/* Move up the stack. */
    247  1.7       cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    248  1.7       cgd 			/* Get the parent page. */
    249  1.7       cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    250  1.7       cgd 				return (1);
    251  1.7       cgd 
    252  1.7       cgd 			/* Move to the next index. */
    253  1.7       cgd 			if (parent->index != 0) {
    254  1.7       cgd 				index = parent->index - 1;
    255  1.7       cgd 				BT_PUSH(t, h->pgno, index);
    256  1.7       cgd 				break;
    257  1.7       cgd 			}
    258  1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    259  1.7       cgd 		}
    260  1.7       cgd 
    261  1.7       cgd 		/* Restore the stack. */
    262  1.7       cgd 		while (level--) {
    263  1.7       cgd 			/* Push the next level down onto the stack. */
    264  1.7       cgd 			bi = GETBINTERNAL(h, index);
    265  1.7       cgd 			pgno = bi->pgno;
    266  1.7       cgd 
    267  1.7       cgd 			/* Lose the currently pinned page. */
    268  1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    269  1.7       cgd 
    270  1.7       cgd 			/* Get the next level down. */
    271  1.7       cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    272  1.7       cgd 				return (1);
    273  1.7       cgd 
    274  1.7       cgd 			index = NEXTINDEX(h) - 1;
    275  1.7       cgd 			BT_PUSH(t, pgno, index);
    276  1.7       cgd 		}
    277  1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    278  1.7       cgd 		if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
    279  1.7       cgd 			return (1);
    280  1.7       cgd 	}
    281  1.7       cgd 
    282  1.7       cgd 
    283  1.7       cgd ret:	mpool_put(t->bt_mp, h, 0);
    284  1.7       cgd 	return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
    285  1.7       cgd }
    286  1.7       cgd 
    287  1.7       cgd /*
    288  1.7       cgd  * __bt_bdelete --
    289  1.7       cgd  *	Delete all key/data pairs matching the specified key.
    290  1.1       cgd  *
    291  1.1       cgd  * Parameters:
    292  1.7       cgd  *	  t:	tree
    293  1.1       cgd  *	key:	key to delete
    294  1.1       cgd  *
    295  1.1       cgd  * Returns:
    296  1.1       cgd  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
    297  1.1       cgd  */
    298  1.1       cgd static int
    299  1.7       cgd __bt_bdelete(t, key)
    300  1.1       cgd 	BTREE *t;
    301  1.1       cgd 	const DBT *key;
    302  1.1       cgd {
    303  1.7       cgd 	EPG *e;
    304  1.1       cgd 	PAGE *h;
    305  1.7       cgd 	int deleted, exact, redo;
    306  1.7       cgd 
    307  1.7       cgd 	deleted = 0;
    308  1.1       cgd 
    309  1.1       cgd 	/* Find any matching record; __bt_search pins the page. */
    310  1.7       cgd loop:	if ((e = __bt_search(t, key, &exact)) == NULL)
    311  1.7       cgd 		return (deleted ? RET_SUCCESS : RET_ERROR);
    312  1.1       cgd 	if (!exact) {
    313  1.1       cgd 		mpool_put(t->bt_mp, e->page, 0);
    314  1.7       cgd 		return (deleted ? RET_SUCCESS : RET_SPECIAL);
    315  1.1       cgd 	}
    316  1.1       cgd 
    317  1.1       cgd 	/*
    318  1.7       cgd 	 * Delete forward, then delete backward, from the found key.  If
    319  1.7       cgd 	 * there are duplicates and we reach either side of the page, do
    320  1.7       cgd 	 * the key search again, so that we get them all.
    321  1.1       cgd 	 */
    322  1.7       cgd 	redo = 0;
    323  1.7       cgd 	h = e->page;
    324  1.7       cgd 	do {
    325  1.7       cgd 		if (__bt_dleaf(t, key, h, e->index)) {
    326  1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    327  1.7       cgd 			return (RET_ERROR);
    328  1.7       cgd 		}
    329  1.7       cgd 		if (F_ISSET(t, B_NODUPS)) {
    330  1.7       cgd 			if (NEXTINDEX(h) == 0) {
    331  1.7       cgd 				if (__bt_pdelete(t, h))
    332  1.1       cgd 					return (RET_ERROR);
    333  1.7       cgd 			} else
    334  1.7       cgd 				mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    335  1.7       cgd 			return (RET_SUCCESS);
    336  1.7       cgd 		}
    337  1.7       cgd 		deleted = 1;
    338  1.7       cgd 	} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
    339  1.7       cgd 
    340  1.7       cgd 	/* Check for right-hand edge of the page. */
    341  1.7       cgd 	if (e->index == NEXTINDEX(h))
    342  1.7       cgd 		redo = 1;
    343  1.1       cgd 
    344  1.7       cgd 	/* Delete from the key to the beginning of the page. */
    345  1.7       cgd 	while (e->index-- > 0) {
    346  1.7       cgd 		if (__bt_cmp(t, key, e) != 0)
    347  1.1       cgd 			break;
    348  1.7       cgd 		if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
    349  1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    350  1.7       cgd 			return (RET_ERROR);
    351  1.1       cgd 		}
    352  1.7       cgd 		if (e->index == 0)
    353  1.7       cgd 			redo = 1;
    354  1.7       cgd 	}
    355  1.1       cgd 
    356  1.7       cgd 	/* Check for an empty page. */
    357  1.7       cgd 	if (NEXTINDEX(h) == 0) {
    358  1.7       cgd 		if (__bt_pdelete(t, h))
    359  1.7       cgd 			return (RET_ERROR);
    360  1.7       cgd 		goto loop;
    361  1.1       cgd 	}
    362  1.1       cgd 
    363  1.7       cgd 	/* Put the page. */
    364  1.7       cgd 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    365  1.7       cgd 
    366  1.7       cgd 	if (redo)
    367  1.7       cgd 		goto loop;
    368  1.7       cgd 	return (RET_SUCCESS);
    369  1.7       cgd }
    370  1.7       cgd 
    371  1.7       cgd /*
    372  1.7       cgd  * __bt_pdelete --
    373  1.7       cgd  *	Delete a single page from the tree.
    374  1.7       cgd  *
    375  1.7       cgd  * Parameters:
    376  1.7       cgd  *	t:	tree
    377  1.7       cgd  *	h:	leaf page
    378  1.7       cgd  *
    379  1.7       cgd  * Returns:
    380  1.7       cgd  *	RET_SUCCESS, RET_ERROR.
    381  1.7       cgd  *
    382  1.7       cgd  * Side-effects:
    383  1.7       cgd  *	mpool_put's the page
    384  1.7       cgd  */
    385  1.7       cgd static int
    386  1.7       cgd __bt_pdelete(t, h)
    387  1.7       cgd 	BTREE *t;
    388  1.7       cgd 	PAGE *h;
    389  1.7       cgd {
    390  1.7       cgd 	BINTERNAL *bi;
    391  1.7       cgd 	PAGE *pg;
    392  1.7       cgd 	EPGNO *parent;
    393  1.7       cgd 	indx_t cnt, index, *ip, offset;
    394  1.7       cgd 	u_int32_t nksize;
    395  1.7       cgd 	char *from;
    396  1.7       cgd 
    397  1.1       cgd 	/*
    398  1.7       cgd 	 * Walk the parent page stack -- a LIFO stack of the pages that were
    399  1.7       cgd 	 * traversed when we searched for the page where the delete occurred.
    400  1.7       cgd 	 * Each stack entry is a page number and a page index offset.  The
    401  1.7       cgd 	 * offset is for the page traversed on the search.  We've just deleted
    402  1.7       cgd 	 * a page, so we have to delete the key from the parent page.
    403  1.7       cgd 	 *
    404  1.7       cgd 	 * If the delete from the parent page makes it empty, this process may
    405  1.7       cgd 	 * continue all the way up the tree.  We stop if we reach the root page
    406  1.7       cgd 	 * (which is never deleted, it's just not worth the effort) or if the
    407  1.7       cgd 	 * delete does not empty the page.
    408  1.1       cgd 	 */
    409  1.7       cgd 	while ((parent = BT_POP(t)) != NULL) {
    410  1.7       cgd 		/* Get the parent page. */
    411  1.7       cgd 		if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    412  1.7       cgd 			return (RET_ERROR);
    413  1.7       cgd 
    414  1.7       cgd 		index = parent->index;
    415  1.7       cgd 		bi = GETBINTERNAL(pg, index);
    416  1.7       cgd 
    417  1.7       cgd 		/* Free any overflow pages. */
    418  1.7       cgd 		if (bi->flags & P_BIGKEY &&
    419  1.7       cgd 		    __ovfl_delete(t, bi->bytes) == RET_ERROR) {
    420  1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    421  1.7       cgd 			return (RET_ERROR);
    422  1.7       cgd 		}
    423  1.1       cgd 
    424  1.7       cgd 		/*
    425  1.7       cgd 		 * Free the parent if it has only the one key and it's not the
    426  1.7       cgd 		 * root page. If it's the rootpage, turn it back into an empty
    427  1.7       cgd 		 * leaf page.
    428  1.7       cgd 		 */
    429  1.7       cgd 		if (NEXTINDEX(pg) == 1)
    430  1.7       cgd 			if (pg->pgno == P_ROOT) {
    431  1.7       cgd 				pg->lower = BTDATAOFF;
    432  1.7       cgd 				pg->upper = t->bt_psize;
    433  1.7       cgd 				pg->flags = P_BLEAF;
    434  1.1       cgd 			} else {
    435  1.7       cgd 				if (__bt_relink(t, pg) || __bt_free(t, pg))
    436  1.1       cgd 					return (RET_ERROR);
    437  1.7       cgd 				continue;
    438  1.1       cgd 			}
    439  1.7       cgd 		else {
    440  1.7       cgd 			/* Pack remaining key items at the end of the page. */
    441  1.7       cgd 			nksize = NBINTERNAL(bi->ksize);
    442  1.7       cgd 			from = (char *)pg + pg->upper;
    443  1.7       cgd 			memmove(from + nksize, from, (char *)bi - from);
    444  1.7       cgd 			pg->upper += nksize;
    445  1.7       cgd 
    446  1.7       cgd 			/* Adjust indices' offsets, shift the indices down. */
    447  1.7       cgd 			offset = pg->linp[index];
    448  1.7       cgd 			for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip)
    449  1.7       cgd 				if (ip[0] < offset)
    450  1.7       cgd 					ip[0] += nksize;
    451  1.7       cgd 			for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip)
    452  1.7       cgd 				ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
    453  1.7       cgd 			pg->lower -= sizeof(indx_t);
    454  1.1       cgd 		}
    455  1.1       cgd 
    456  1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    457  1.7       cgd 		break;
    458  1.1       cgd 	}
    459  1.1       cgd 
    460  1.7       cgd 	/* Free the leaf page, as long as it wasn't the root. */
    461  1.7       cgd 	if (h->pgno == P_ROOT) {
    462  1.7       cgd 		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    463  1.7       cgd 		return (RET_SUCCESS);
    464  1.7       cgd 	}
    465  1.7       cgd 	return (__bt_relink(t, h) || __bt_free(t, h));
    466  1.1       cgd }
    467  1.1       cgd 
    468  1.1       cgd /*
    469  1.7       cgd  * __bt_dleaf --
    470  1.7       cgd  *	Delete a single record from a leaf page.
    471  1.1       cgd  *
    472  1.1       cgd  * Parameters:
    473  1.1       cgd  *	t:	tree
    474  1.7       cgd  *    key:	referenced key
    475  1.7       cgd  *	h:	page
    476  1.7       cgd  *	index:	index on page to delete
    477  1.1       cgd  *
    478  1.1       cgd  * Returns:
    479  1.1       cgd  *	RET_SUCCESS, RET_ERROR.
    480  1.1       cgd  */
    481  1.1       cgd int
    482  1.7       cgd __bt_dleaf(t, key, h, index)
    483  1.1       cgd 	BTREE *t;
    484  1.7       cgd 	const DBT *key;
    485  1.1       cgd 	PAGE *h;
    486  1.7       cgd 	u_int index;
    487  1.1       cgd {
    488  1.7       cgd 	BLEAF *bl;
    489  1.7       cgd 	indx_t cnt, *ip, offset;
    490  1.7       cgd 	u_int32_t nbytes;
    491  1.7       cgd 	void *to;
    492  1.1       cgd 	char *from;
    493  1.1       cgd 
    494  1.7       cgd 	/* If this record is referenced by the cursor, delete the cursor. */
    495  1.7       cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    496  1.7       cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    497  1.7       cgd 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index &&
    498  1.7       cgd 	    __bt_curdel(t, key, h, index))
    499  1.7       cgd 		return (RET_ERROR);
    500  1.7       cgd 
    501  1.7       cgd 	/* If the entry uses overflow pages, make them available for reuse. */
    502  1.1       cgd 	to = bl = GETBLEAF(h, index);
    503  1.1       cgd 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
    504  1.1       cgd 		return (RET_ERROR);
    505  1.1       cgd 	if (bl->flags & P_BIGDATA &&
    506  1.1       cgd 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
    507  1.1       cgd 		return (RET_ERROR);
    508  1.7       cgd 
    509  1.7       cgd 	/* Pack the remaining key/data items at the end of the page. */
    510  1.1       cgd 	nbytes = NBLEAF(bl);
    511  1.1       cgd 	from = (char *)h + h->upper;
    512  1.1       cgd 	memmove(from + nbytes, from, (char *)to - from);
    513  1.1       cgd 	h->upper += nbytes;
    514  1.1       cgd 
    515  1.7       cgd 	/* Adjust the indices' offsets, shift the indices down. */
    516  1.1       cgd 	offset = h->linp[index];
    517  1.1       cgd 	for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
    518  1.1       cgd 		if (ip[0] < offset)
    519  1.1       cgd 			ip[0] += nbytes;
    520  1.1       cgd 	for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
    521  1.1       cgd 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
    522  1.1       cgd 	h->lower -= sizeof(indx_t);
    523  1.7       cgd 
    524  1.7       cgd 	/* If the cursor is on this page, adjust it as necessary. */
    525  1.7       cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    526  1.7       cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    527  1.7       cgd 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index)
    528  1.7       cgd 		--t->bt_cursor.pg.index;
    529  1.7       cgd 
    530  1.1       cgd 	return (RET_SUCCESS);
    531  1.7       cgd }
    532  1.7       cgd 
    533  1.7       cgd /*
    534  1.7       cgd  * __bt_curdel --
    535  1.7       cgd  *	Delete the cursor.
    536  1.7       cgd  *
    537  1.7       cgd  * Parameters:
    538  1.7       cgd  *	t:	tree
    539  1.7       cgd  *    key:	referenced key (or NULL)
    540  1.7       cgd  *	h:	page
    541  1.7       cgd  *  index:	index on page to delete
    542  1.7       cgd  *
    543  1.7       cgd  * Returns:
    544  1.7       cgd  *	RET_SUCCESS, RET_ERROR.
    545  1.7       cgd  */
    546  1.7       cgd static int
    547  1.7       cgd __bt_curdel(t, key, h, index)
    548  1.7       cgd 	BTREE *t;
    549  1.7       cgd 	const DBT *key;
    550  1.7       cgd 	PAGE *h;
    551  1.7       cgd 	u_int index;
    552  1.7       cgd {
    553  1.7       cgd 	CURSOR *c;
    554  1.7       cgd 	EPG e;
    555  1.7       cgd 	PAGE *pg;
    556  1.7       cgd 	int curcopy, status;
    557  1.7       cgd 
    558  1.7       cgd 	/*
    559  1.7       cgd 	 * If there are duplicates, move forward or backward to one.
    560  1.7       cgd 	 * Otherwise, copy the key into the cursor area.
    561  1.7       cgd 	 */
    562  1.7       cgd 	c = &t->bt_cursor;
    563  1.7       cgd 	F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
    564  1.7       cgd 
    565  1.7       cgd 	curcopy = 0;
    566  1.7       cgd 	if (!F_ISSET(t, B_NODUPS)) {
    567  1.7       cgd 		/*
    568  1.7       cgd 		 * We're going to have to do comparisons.  If we weren't
    569  1.7       cgd 		 * provided a copy of the key, i.e. the user is deleting
    570  1.7       cgd 		 * the current cursor position, get one.
    571  1.7       cgd 		 */
    572  1.7       cgd 		if (key == NULL) {
    573  1.7       cgd 			e.page = h;
    574  1.7       cgd 			e.index = index;
    575  1.7       cgd 			if ((status = __bt_ret(t, &e,
    576  1.7       cgd 			    &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
    577  1.7       cgd 				return (status);
    578  1.7       cgd 			curcopy = 1;
    579  1.7       cgd 			key = &c->key;
    580  1.7       cgd 		}
    581  1.7       cgd 		/* Check previous key, if not at the beginning of the page. */
    582  1.7       cgd 		if (index > 0) {
    583  1.7       cgd 			e.page = h;
    584  1.7       cgd 			e.index = index - 1;
    585  1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    586  1.7       cgd 				F_SET(c, CURS_BEFORE);
    587  1.7       cgd 				goto dup2;
    588  1.7       cgd 			}
    589  1.7       cgd 		}
    590  1.7       cgd 		/* Check next key, if not at the end of the page. */
    591  1.7       cgd 		if (index < NEXTINDEX(h) - 1) {
    592  1.7       cgd 			e.page = h;
    593  1.7       cgd 			e.index = index + 1;
    594  1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    595  1.7       cgd 				F_SET(c, CURS_AFTER);
    596  1.7       cgd 				goto dup2;
    597  1.7       cgd 			}
    598  1.7       cgd 		}
    599  1.7       cgd 		/* Check previous key if at the beginning of the page. */
    600  1.7       cgd 		if (index == 0 && h->prevpg != P_INVALID) {
    601  1.7       cgd 			if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    602  1.7       cgd 				return (RET_ERROR);
    603  1.7       cgd 			e.page = pg;
    604  1.7       cgd 			e.index = NEXTINDEX(pg) - 1;
    605  1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    606  1.7       cgd 				F_SET(c, CURS_BEFORE);
    607  1.7       cgd 				goto dup1;
    608  1.7       cgd 			}
    609  1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    610  1.7       cgd 		}
    611  1.7       cgd 		/* Check next key if at the end of the page. */
    612  1.7       cgd 		if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
    613  1.7       cgd 			if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    614  1.7       cgd 				return (RET_ERROR);
    615  1.7       cgd 			e.page = pg;
    616  1.7       cgd 			e.index = 0;
    617  1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    618  1.7       cgd 				F_SET(c, CURS_AFTER);
    619  1.7       cgd dup1:				mpool_put(t->bt_mp, pg, 0);
    620  1.7       cgd dup2:				c->pg.pgno = e.page->pgno;
    621  1.7       cgd 				c->pg.index = e.index;
    622  1.7       cgd 				return (RET_SUCCESS);
    623  1.7       cgd 			}
    624  1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    625  1.7       cgd 		}
    626  1.7       cgd 	}
    627  1.7       cgd 	e.page = h;
    628  1.7       cgd 	e.index = index;
    629  1.7       cgd 	if (curcopy || (status =
    630  1.7       cgd 	    __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
    631  1.7       cgd 		F_SET(c, CURS_ACQUIRE);
    632  1.7       cgd 		return (RET_SUCCESS);
    633  1.7       cgd 	}
    634  1.7       cgd 	return (status);
    635  1.7       cgd }
    636  1.7       cgd 
    637  1.7       cgd /*
    638  1.7       cgd  * __bt_relink --
    639  1.7       cgd  *	Link around a deleted page.
    640  1.7       cgd  *
    641  1.7       cgd  * Parameters:
    642  1.7       cgd  *	t:	tree
    643  1.7       cgd  *	h:	page to be deleted
    644  1.7       cgd  */
    645  1.7       cgd static int
    646  1.7       cgd __bt_relink(t, h)
    647  1.7       cgd 	BTREE *t;
    648  1.7       cgd 	PAGE *h;
    649  1.7       cgd {
    650  1.7       cgd 	PAGE *pg;
    651  1.7       cgd 
    652  1.7       cgd 	if (h->nextpg != P_INVALID) {
    653  1.7       cgd 		if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    654  1.7       cgd 			return (RET_ERROR);
    655  1.7       cgd 		pg->prevpg = h->prevpg;
    656  1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    657  1.7       cgd 	}
    658  1.7       cgd 	if (h->prevpg != P_INVALID) {
    659  1.7       cgd 		if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    660  1.7       cgd 			return (RET_ERROR);
    661  1.7       cgd 		pg->nextpg = h->nextpg;
    662  1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    663  1.7       cgd 	}
    664  1.7       cgd 	return (0);
    665  1.1       cgd }
    666