Home | History | Annotate | Line # | Download | only in btree
bt_delete.c revision 1.10
      1  1.10  christos /*	$NetBSD: bt_delete.c,v 1.10 1998/12/09 12:42:46 christos Exp $	*/
      2   1.6       cgd 
      3   1.1       cgd /*-
      4   1.5       cgd  * Copyright (c) 1990, 1993, 1994
      5   1.1       cgd  *	The Regents of the University of California.  All rights reserved.
      6   1.1       cgd  *
      7   1.1       cgd  * This code is derived from software contributed to Berkeley by
      8   1.1       cgd  * Mike Olson.
      9   1.1       cgd  *
     10   1.1       cgd  * Redistribution and use in source and binary forms, with or without
     11   1.1       cgd  * modification, are permitted provided that the following conditions
     12   1.1       cgd  * are met:
     13   1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     14   1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     15   1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     17   1.1       cgd  *    documentation and/or other materials provided with the distribution.
     18   1.1       cgd  * 3. All advertising materials mentioning features or use of this software
     19   1.1       cgd  *    must display the following acknowledgement:
     20   1.1       cgd  *	This product includes software developed by the University of
     21   1.1       cgd  *	California, Berkeley and its contributors.
     22   1.1       cgd  * 4. Neither the name of the University nor the names of its contributors
     23   1.1       cgd  *    may be used to endorse or promote products derived from this software
     24   1.1       cgd  *    without specific prior written permission.
     25   1.1       cgd  *
     26   1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27   1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28   1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29   1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30   1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31   1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32   1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33   1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34   1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35   1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36   1.1       cgd  * SUCH DAMAGE.
     37   1.1       cgd  */
     38   1.1       cgd 
     39   1.8  christos #include <sys/cdefs.h>
     40   1.1       cgd #if defined(LIBC_SCCS) && !defined(lint)
     41   1.6       cgd #if 0
     42   1.7       cgd static char sccsid[] = "@(#)bt_delete.c	8.13 (Berkeley) 7/28/94";
     43   1.6       cgd #else
     44  1.10  christos __RCSID("$NetBSD: bt_delete.c,v 1.10 1998/12/09 12:42:46 christos Exp $");
     45   1.6       cgd #endif
     46   1.1       cgd #endif /* LIBC_SCCS and not lint */
     47   1.1       cgd 
     48   1.9       jtc #include "namespace.h"
     49   1.1       cgd #include <sys/types.h>
     50   1.1       cgd 
     51   1.1       cgd #include <errno.h>
     52   1.1       cgd #include <stdio.h>
     53   1.1       cgd #include <string.h>
     54   1.1       cgd 
     55   1.1       cgd #include <db.h>
     56   1.1       cgd #include "btree.h"
     57   1.1       cgd 
     58   1.7       cgd static int __bt_bdelete __P((BTREE *, const DBT *));
     59   1.7       cgd static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int));
     60   1.7       cgd static int __bt_pdelete __P((BTREE *, PAGE *));
     61   1.7       cgd static int __bt_relink __P((BTREE *, PAGE *));
     62   1.7       cgd static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *));
     63   1.1       cgd 
     64   1.1       cgd /*
     65   1.7       cgd  * __bt_delete
     66   1.7       cgd  *	Delete the item(s) referenced by a key.
     67   1.1       cgd  *
     68   1.7       cgd  * Return RET_SPECIAL if the key is not found.
     69   1.1       cgd  */
     70   1.1       cgd int
     71   1.1       cgd __bt_delete(dbp, key, flags)
     72   1.1       cgd 	const DB *dbp;
     73   1.1       cgd 	const DBT *key;
     74   1.1       cgd 	u_int flags;
     75   1.1       cgd {
     76   1.1       cgd 	BTREE *t;
     77   1.7       cgd 	CURSOR *c;
     78   1.7       cgd 	PAGE *h;
     79   1.1       cgd 	int status;
     80   1.1       cgd 
     81   1.1       cgd 	t = dbp->internal;
     82   1.4       cgd 
     83   1.4       cgd 	/* Toss any page pinned across calls. */
     84   1.4       cgd 	if (t->bt_pinned != NULL) {
     85   1.4       cgd 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     86   1.4       cgd 		t->bt_pinned = NULL;
     87   1.4       cgd 	}
     88   1.4       cgd 
     89   1.7       cgd 	/* Check for change to a read-only tree. */
     90   1.7       cgd 	if (F_ISSET(t, B_RDONLY)) {
     91   1.1       cgd 		errno = EPERM;
     92   1.1       cgd 		return (RET_ERROR);
     93   1.1       cgd 	}
     94   1.4       cgd 
     95   1.7       cgd 	switch (flags) {
     96   1.1       cgd 	case 0:
     97   1.7       cgd 		status = __bt_bdelete(t, key);
     98   1.1       cgd 		break;
     99   1.1       cgd 	case R_CURSOR:
    100   1.1       cgd 		/*
    101   1.7       cgd 		 * If flags is R_CURSOR, delete the cursor.  Must already
    102   1.7       cgd 		 * have started a scan and not have already deleted it.
    103   1.1       cgd 		 */
    104   1.7       cgd 		c = &t->bt_cursor;
    105   1.7       cgd 		if (F_ISSET(c, CURS_INIT)) {
    106   1.7       cgd 			if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
    107   1.7       cgd 				return (RET_SPECIAL);
    108   1.7       cgd 			if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
    109   1.7       cgd 				return (RET_ERROR);
    110   1.7       cgd 
    111   1.7       cgd 			/*
    112   1.7       cgd 			 * If the page is about to be emptied, we'll need to
    113   1.7       cgd 			 * delete it, which means we have to acquire a stack.
    114   1.7       cgd 			 */
    115   1.7       cgd 			if (NEXTINDEX(h) == 1)
    116   1.7       cgd 				if (__bt_stkacq(t, &h, &t->bt_cursor))
    117   1.7       cgd 					return (RET_ERROR);
    118   1.7       cgd 
    119  1.10  christos 			status = __bt_dleaf(t, NULL, h, (u_int)c->pg.index);
    120   1.7       cgd 
    121   1.7       cgd 			if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
    122   1.7       cgd 				if (__bt_pdelete(t, h))
    123   1.7       cgd 					return (RET_ERROR);
    124   1.7       cgd 			} else
    125  1.10  christos 				mpool_put(t->bt_mp, h,
    126  1.10  christos 				    (u_int)(status == RET_SUCCESS ?
    127  1.10  christos 				    MPOOL_DIRTY : 0));
    128   1.7       cgd 			break;
    129   1.7       cgd 		}
    130   1.7       cgd 		/* FALLTHROUGH */
    131   1.1       cgd 	default:
    132   1.7       cgd 		errno = EINVAL;
    133   1.1       cgd 		return (RET_ERROR);
    134   1.1       cgd 	}
    135   1.1       cgd 	if (status == RET_SUCCESS)
    136   1.7       cgd 		F_SET(t, B_MODIFIED);
    137   1.1       cgd 	return (status);
    138   1.1       cgd }
    139   1.1       cgd 
    140   1.1       cgd /*
    141   1.7       cgd  * __bt_stkacq --
    142   1.7       cgd  *	Acquire a stack so we can delete a cursor entry.
    143   1.7       cgd  *
    144   1.7       cgd  * Parameters:
    145   1.7       cgd  *	  t:	tree
    146   1.7       cgd  *	 hp:	pointer to current, pinned PAGE pointer
    147   1.7       cgd  *	  c:	pointer to the cursor
    148   1.7       cgd  *
    149   1.7       cgd  * Returns:
    150   1.7       cgd  *	0 on success, 1 on failure
    151   1.7       cgd  */
    152   1.7       cgd static int
    153   1.7       cgd __bt_stkacq(t, hp, c)
    154   1.7       cgd 	BTREE *t;
    155   1.7       cgd 	PAGE **hp;
    156   1.7       cgd 	CURSOR *c;
    157   1.7       cgd {
    158   1.7       cgd 	BINTERNAL *bi;
    159   1.7       cgd 	EPG *e;
    160   1.7       cgd 	EPGNO *parent;
    161   1.7       cgd 	PAGE *h;
    162  1.10  christos 	indx_t idx = 0;	/* Pacify gcc */
    163   1.7       cgd 	pgno_t pgno;
    164   1.7       cgd 	recno_t nextpg, prevpg;
    165   1.7       cgd 	int exact, level;
    166   1.7       cgd 
    167   1.7       cgd 	/*
    168   1.7       cgd 	 * Find the first occurrence of the key in the tree.  Toss the
    169   1.7       cgd 	 * currently locked page so we don't hit an already-locked page.
    170   1.7       cgd 	 */
    171   1.7       cgd 	h = *hp;
    172   1.7       cgd 	mpool_put(t->bt_mp, h, 0);
    173   1.7       cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    174   1.7       cgd 		return (1);
    175   1.7       cgd 	h = e->page;
    176   1.7       cgd 
    177   1.7       cgd 	/* See if we got it in one shot. */
    178   1.7       cgd 	if (h->pgno == c->pg.pgno)
    179   1.7       cgd 		goto ret;
    180   1.7       cgd 
    181   1.7       cgd 	/*
    182   1.7       cgd 	 * Move right, looking for the page.  At each move we have to move
    183   1.7       cgd 	 * up the stack until we don't have to move to the next page.  If
    184   1.7       cgd 	 * we have to change pages at an internal level, we have to fix the
    185   1.7       cgd 	 * stack back up.
    186   1.7       cgd 	 */
    187   1.7       cgd 	while (h->pgno != c->pg.pgno) {
    188   1.7       cgd 		if ((nextpg = h->nextpg) == P_INVALID)
    189   1.7       cgd 			break;
    190   1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    191   1.7       cgd 
    192   1.7       cgd 		/* Move up the stack. */
    193   1.7       cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    194   1.7       cgd 			/* Get the parent page. */
    195   1.7       cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    196   1.7       cgd 				return (1);
    197   1.7       cgd 
    198   1.7       cgd 			/* Move to the next index. */
    199   1.7       cgd 			if (parent->index != NEXTINDEX(h) - 1) {
    200  1.10  christos 				idx = parent->index + 1;
    201  1.10  christos 				BT_PUSH(t, h->pgno, idx);
    202   1.7       cgd 				break;
    203   1.7       cgd 			}
    204   1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    205   1.7       cgd 		}
    206   1.7       cgd 
    207   1.7       cgd 		/* Restore the stack. */
    208   1.7       cgd 		while (level--) {
    209   1.7       cgd 			/* Push the next level down onto the stack. */
    210  1.10  christos 			bi = GETBINTERNAL(h, idx);
    211   1.7       cgd 			pgno = bi->pgno;
    212   1.7       cgd 			BT_PUSH(t, pgno, 0);
    213   1.7       cgd 
    214   1.7       cgd 			/* Lose the currently pinned page. */
    215   1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    216   1.7       cgd 
    217   1.7       cgd 			/* Get the next level down. */
    218   1.7       cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    219   1.7       cgd 				return (1);
    220  1.10  christos 			idx = 0;
    221   1.7       cgd 		}
    222   1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    223   1.7       cgd 		if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
    224   1.7       cgd 			return (1);
    225   1.7       cgd 	}
    226   1.7       cgd 
    227   1.7       cgd 	if (h->pgno == c->pg.pgno)
    228   1.7       cgd 		goto ret;
    229   1.7       cgd 
    230   1.7       cgd 	/* Reacquire the original stack. */
    231   1.7       cgd 	mpool_put(t->bt_mp, h, 0);
    232   1.7       cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    233   1.7       cgd 		return (1);
    234   1.7       cgd 	h = e->page;
    235   1.7       cgd 
    236   1.7       cgd 	/*
    237   1.7       cgd 	 * Move left, looking for the page.  At each move we have to move
    238   1.7       cgd 	 * up the stack until we don't have to change pages to move to the
    239   1.7       cgd 	 * next page.  If we have to change pages at an internal level, we
    240   1.7       cgd 	 * have to fix the stack back up.
    241   1.7       cgd 	 */
    242   1.7       cgd 	while (h->pgno != c->pg.pgno) {
    243   1.7       cgd 		if ((prevpg = h->prevpg) == P_INVALID)
    244   1.7       cgd 			break;
    245   1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    246   1.7       cgd 
    247   1.7       cgd 		/* Move up the stack. */
    248   1.7       cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    249   1.7       cgd 			/* Get the parent page. */
    250   1.7       cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    251   1.7       cgd 				return (1);
    252   1.7       cgd 
    253   1.7       cgd 			/* Move to the next index. */
    254   1.7       cgd 			if (parent->index != 0) {
    255  1.10  christos 				idx = parent->index - 1;
    256  1.10  christos 				BT_PUSH(t, h->pgno, idx);
    257   1.7       cgd 				break;
    258   1.7       cgd 			}
    259   1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    260   1.7       cgd 		}
    261   1.7       cgd 
    262   1.7       cgd 		/* Restore the stack. */
    263   1.7       cgd 		while (level--) {
    264   1.7       cgd 			/* Push the next level down onto the stack. */
    265  1.10  christos 			bi = GETBINTERNAL(h, idx);
    266   1.7       cgd 			pgno = bi->pgno;
    267   1.7       cgd 
    268   1.7       cgd 			/* Lose the currently pinned page. */
    269   1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    270   1.7       cgd 
    271   1.7       cgd 			/* Get the next level down. */
    272   1.7       cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    273   1.7       cgd 				return (1);
    274   1.7       cgd 
    275  1.10  christos 			idx = NEXTINDEX(h) - 1;
    276  1.10  christos 			BT_PUSH(t, pgno, idx);
    277   1.7       cgd 		}
    278   1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    279   1.7       cgd 		if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
    280   1.7       cgd 			return (1);
    281   1.7       cgd 	}
    282   1.7       cgd 
    283   1.7       cgd 
    284   1.7       cgd ret:	mpool_put(t->bt_mp, h, 0);
    285   1.7       cgd 	return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
    286   1.7       cgd }
    287   1.7       cgd 
    288   1.7       cgd /*
    289   1.7       cgd  * __bt_bdelete --
    290   1.7       cgd  *	Delete all key/data pairs matching the specified key.
    291   1.1       cgd  *
    292   1.1       cgd  * Parameters:
    293   1.7       cgd  *	  t:	tree
    294   1.1       cgd  *	key:	key to delete
    295   1.1       cgd  *
    296   1.1       cgd  * Returns:
    297   1.1       cgd  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
    298   1.1       cgd  */
    299   1.1       cgd static int
    300   1.7       cgd __bt_bdelete(t, key)
    301   1.1       cgd 	BTREE *t;
    302   1.1       cgd 	const DBT *key;
    303   1.1       cgd {
    304   1.7       cgd 	EPG *e;
    305   1.1       cgd 	PAGE *h;
    306   1.7       cgd 	int deleted, exact, redo;
    307   1.7       cgd 
    308   1.7       cgd 	deleted = 0;
    309   1.1       cgd 
    310   1.1       cgd 	/* Find any matching record; __bt_search pins the page. */
    311   1.7       cgd loop:	if ((e = __bt_search(t, key, &exact)) == NULL)
    312   1.7       cgd 		return (deleted ? RET_SUCCESS : RET_ERROR);
    313   1.1       cgd 	if (!exact) {
    314   1.1       cgd 		mpool_put(t->bt_mp, e->page, 0);
    315   1.7       cgd 		return (deleted ? RET_SUCCESS : RET_SPECIAL);
    316   1.1       cgd 	}
    317   1.1       cgd 
    318   1.1       cgd 	/*
    319   1.7       cgd 	 * Delete forward, then delete backward, from the found key.  If
    320   1.7       cgd 	 * there are duplicates and we reach either side of the page, do
    321   1.7       cgd 	 * the key search again, so that we get them all.
    322   1.1       cgd 	 */
    323   1.7       cgd 	redo = 0;
    324   1.7       cgd 	h = e->page;
    325   1.7       cgd 	do {
    326  1.10  christos 		if (__bt_dleaf(t, key, h, (u_int)e->index)) {
    327   1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    328   1.7       cgd 			return (RET_ERROR);
    329   1.7       cgd 		}
    330   1.7       cgd 		if (F_ISSET(t, B_NODUPS)) {
    331   1.7       cgd 			if (NEXTINDEX(h) == 0) {
    332   1.7       cgd 				if (__bt_pdelete(t, h))
    333   1.1       cgd 					return (RET_ERROR);
    334   1.7       cgd 			} else
    335   1.7       cgd 				mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    336   1.7       cgd 			return (RET_SUCCESS);
    337   1.7       cgd 		}
    338   1.7       cgd 		deleted = 1;
    339   1.7       cgd 	} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
    340   1.7       cgd 
    341   1.7       cgd 	/* Check for right-hand edge of the page. */
    342   1.7       cgd 	if (e->index == NEXTINDEX(h))
    343   1.7       cgd 		redo = 1;
    344   1.1       cgd 
    345   1.7       cgd 	/* Delete from the key to the beginning of the page. */
    346   1.7       cgd 	while (e->index-- > 0) {
    347   1.7       cgd 		if (__bt_cmp(t, key, e) != 0)
    348   1.1       cgd 			break;
    349  1.10  christos 		if (__bt_dleaf(t, key, h, (u_int)e->index) == RET_ERROR) {
    350   1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    351   1.7       cgd 			return (RET_ERROR);
    352   1.1       cgd 		}
    353   1.7       cgd 		if (e->index == 0)
    354   1.7       cgd 			redo = 1;
    355   1.7       cgd 	}
    356   1.1       cgd 
    357   1.7       cgd 	/* Check for an empty page. */
    358   1.7       cgd 	if (NEXTINDEX(h) == 0) {
    359   1.7       cgd 		if (__bt_pdelete(t, h))
    360   1.7       cgd 			return (RET_ERROR);
    361   1.7       cgd 		goto loop;
    362   1.1       cgd 	}
    363   1.1       cgd 
    364   1.7       cgd 	/* Put the page. */
    365   1.7       cgd 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    366   1.7       cgd 
    367   1.7       cgd 	if (redo)
    368   1.7       cgd 		goto loop;
    369   1.7       cgd 	return (RET_SUCCESS);
    370   1.7       cgd }
    371   1.7       cgd 
    372   1.7       cgd /*
    373   1.7       cgd  * __bt_pdelete --
    374   1.7       cgd  *	Delete a single page from the tree.
    375   1.7       cgd  *
    376   1.7       cgd  * Parameters:
    377   1.7       cgd  *	t:	tree
    378   1.7       cgd  *	h:	leaf page
    379   1.7       cgd  *
    380   1.7       cgd  * Returns:
    381   1.7       cgd  *	RET_SUCCESS, RET_ERROR.
    382   1.7       cgd  *
    383   1.7       cgd  * Side-effects:
    384   1.7       cgd  *	mpool_put's the page
    385   1.7       cgd  */
    386   1.7       cgd static int
    387   1.7       cgd __bt_pdelete(t, h)
    388   1.7       cgd 	BTREE *t;
    389   1.7       cgd 	PAGE *h;
    390   1.7       cgd {
    391   1.7       cgd 	BINTERNAL *bi;
    392   1.7       cgd 	PAGE *pg;
    393   1.7       cgd 	EPGNO *parent;
    394  1.10  christos 	indx_t cnt, idx, *ip, offset;
    395   1.7       cgd 	u_int32_t nksize;
    396   1.7       cgd 	char *from;
    397   1.7       cgd 
    398   1.1       cgd 	/*
    399   1.7       cgd 	 * Walk the parent page stack -- a LIFO stack of the pages that were
    400   1.7       cgd 	 * traversed when we searched for the page where the delete occurred.
    401   1.7       cgd 	 * Each stack entry is a page number and a page index offset.  The
    402   1.7       cgd 	 * offset is for the page traversed on the search.  We've just deleted
    403   1.7       cgd 	 * a page, so we have to delete the key from the parent page.
    404   1.7       cgd 	 *
    405   1.7       cgd 	 * If the delete from the parent page makes it empty, this process may
    406   1.7       cgd 	 * continue all the way up the tree.  We stop if we reach the root page
    407   1.7       cgd 	 * (which is never deleted, it's just not worth the effort) or if the
    408   1.7       cgd 	 * delete does not empty the page.
    409   1.1       cgd 	 */
    410   1.7       cgd 	while ((parent = BT_POP(t)) != NULL) {
    411   1.7       cgd 		/* Get the parent page. */
    412   1.7       cgd 		if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    413   1.7       cgd 			return (RET_ERROR);
    414   1.7       cgd 
    415  1.10  christos 		idx = parent->index;
    416  1.10  christos 		bi = GETBINTERNAL(pg, idx);
    417   1.7       cgd 
    418   1.7       cgd 		/* Free any overflow pages. */
    419   1.7       cgd 		if (bi->flags & P_BIGKEY &&
    420   1.7       cgd 		    __ovfl_delete(t, bi->bytes) == RET_ERROR) {
    421   1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    422   1.7       cgd 			return (RET_ERROR);
    423   1.7       cgd 		}
    424   1.1       cgd 
    425   1.7       cgd 		/*
    426   1.7       cgd 		 * Free the parent if it has only the one key and it's not the
    427   1.7       cgd 		 * root page. If it's the rootpage, turn it back into an empty
    428   1.7       cgd 		 * leaf page.
    429   1.7       cgd 		 */
    430   1.7       cgd 		if (NEXTINDEX(pg) == 1)
    431   1.7       cgd 			if (pg->pgno == P_ROOT) {
    432   1.7       cgd 				pg->lower = BTDATAOFF;
    433   1.7       cgd 				pg->upper = t->bt_psize;
    434   1.7       cgd 				pg->flags = P_BLEAF;
    435   1.1       cgd 			} else {
    436   1.7       cgd 				if (__bt_relink(t, pg) || __bt_free(t, pg))
    437   1.1       cgd 					return (RET_ERROR);
    438   1.7       cgd 				continue;
    439   1.1       cgd 			}
    440   1.7       cgd 		else {
    441   1.7       cgd 			/* Pack remaining key items at the end of the page. */
    442   1.7       cgd 			nksize = NBINTERNAL(bi->ksize);
    443  1.10  christos 			from = (char *)(void *)pg + pg->upper;
    444  1.10  christos 			memmove(from + nksize, from,
    445  1.10  christos 			(size_t)((char *)(void *)bi - from));
    446   1.7       cgd 			pg->upper += nksize;
    447   1.7       cgd 
    448   1.7       cgd 			/* Adjust indices' offsets, shift the indices down. */
    449  1.10  christos 			offset = pg->linp[idx];
    450  1.10  christos 			for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
    451   1.7       cgd 				if (ip[0] < offset)
    452   1.7       cgd 					ip[0] += nksize;
    453  1.10  christos 			for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
    454   1.7       cgd 				ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
    455   1.7       cgd 			pg->lower -= sizeof(indx_t);
    456   1.1       cgd 		}
    457   1.1       cgd 
    458   1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    459   1.7       cgd 		break;
    460   1.1       cgd 	}
    461   1.1       cgd 
    462   1.7       cgd 	/* Free the leaf page, as long as it wasn't the root. */
    463   1.7       cgd 	if (h->pgno == P_ROOT) {
    464   1.7       cgd 		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    465   1.7       cgd 		return (RET_SUCCESS);
    466   1.7       cgd 	}
    467   1.7       cgd 	return (__bt_relink(t, h) || __bt_free(t, h));
    468   1.1       cgd }
    469   1.1       cgd 
    470   1.1       cgd /*
    471   1.7       cgd  * __bt_dleaf --
    472   1.7       cgd  *	Delete a single record from a leaf page.
    473   1.1       cgd  *
    474   1.1       cgd  * Parameters:
    475   1.1       cgd  *	t:	tree
    476   1.7       cgd  *    key:	referenced key
    477   1.7       cgd  *	h:	page
    478  1.10  christos  *	idx:	index on page to delete
    479   1.1       cgd  *
    480   1.1       cgd  * Returns:
    481   1.1       cgd  *	RET_SUCCESS, RET_ERROR.
    482   1.1       cgd  */
    483   1.1       cgd int
    484  1.10  christos __bt_dleaf(t, key, h, idx)
    485   1.1       cgd 	BTREE *t;
    486   1.7       cgd 	const DBT *key;
    487   1.1       cgd 	PAGE *h;
    488  1.10  christos 	u_int idx;
    489   1.1       cgd {
    490   1.7       cgd 	BLEAF *bl;
    491   1.7       cgd 	indx_t cnt, *ip, offset;
    492   1.7       cgd 	u_int32_t nbytes;
    493   1.7       cgd 	void *to;
    494   1.1       cgd 	char *from;
    495   1.1       cgd 
    496   1.7       cgd 	/* If this record is referenced by the cursor, delete the cursor. */
    497   1.7       cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    498   1.7       cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    499  1.10  christos 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
    500  1.10  christos 	    __bt_curdel(t, key, h, idx))
    501   1.7       cgd 		return (RET_ERROR);
    502   1.7       cgd 
    503   1.7       cgd 	/* If the entry uses overflow pages, make them available for reuse. */
    504  1.10  christos 	to = bl = GETBLEAF(h, idx);
    505   1.1       cgd 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
    506   1.1       cgd 		return (RET_ERROR);
    507   1.1       cgd 	if (bl->flags & P_BIGDATA &&
    508   1.1       cgd 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
    509   1.1       cgd 		return (RET_ERROR);
    510   1.7       cgd 
    511   1.7       cgd 	/* Pack the remaining key/data items at the end of the page. */
    512   1.1       cgd 	nbytes = NBLEAF(bl);
    513  1.10  christos 	from = (char *)(void *)h + h->upper;
    514  1.10  christos 	memmove(from + nbytes, from, (size_t)((char *)(void *)to - from));
    515   1.1       cgd 	h->upper += nbytes;
    516   1.1       cgd 
    517   1.7       cgd 	/* Adjust the indices' offsets, shift the indices down. */
    518  1.10  christos 	offset = h->linp[idx];
    519  1.10  christos 	for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
    520   1.1       cgd 		if (ip[0] < offset)
    521   1.1       cgd 			ip[0] += nbytes;
    522  1.10  christos 	for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
    523   1.1       cgd 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
    524   1.1       cgd 	h->lower -= sizeof(indx_t);
    525   1.7       cgd 
    526   1.7       cgd 	/* If the cursor is on this page, adjust it as necessary. */
    527   1.7       cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    528   1.7       cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    529  1.10  christos 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
    530   1.7       cgd 		--t->bt_cursor.pg.index;
    531   1.7       cgd 
    532   1.1       cgd 	return (RET_SUCCESS);
    533   1.7       cgd }
    534   1.7       cgd 
    535   1.7       cgd /*
    536   1.7       cgd  * __bt_curdel --
    537   1.7       cgd  *	Delete the cursor.
    538   1.7       cgd  *
    539   1.7       cgd  * Parameters:
    540   1.7       cgd  *	t:	tree
    541   1.7       cgd  *    key:	referenced key (or NULL)
    542   1.7       cgd  *	h:	page
    543  1.10  christos  *  idx:	index on page to delete
    544   1.7       cgd  *
    545   1.7       cgd  * Returns:
    546   1.7       cgd  *	RET_SUCCESS, RET_ERROR.
    547   1.7       cgd  */
    548   1.7       cgd static int
    549  1.10  christos __bt_curdel(t, key, h, idx)
    550   1.7       cgd 	BTREE *t;
    551   1.7       cgd 	const DBT *key;
    552   1.7       cgd 	PAGE *h;
    553  1.10  christos 	u_int idx;
    554   1.7       cgd {
    555   1.7       cgd 	CURSOR *c;
    556   1.7       cgd 	EPG e;
    557   1.7       cgd 	PAGE *pg;
    558   1.7       cgd 	int curcopy, status;
    559   1.7       cgd 
    560   1.7       cgd 	/*
    561   1.7       cgd 	 * If there are duplicates, move forward or backward to one.
    562   1.7       cgd 	 * Otherwise, copy the key into the cursor area.
    563   1.7       cgd 	 */
    564   1.7       cgd 	c = &t->bt_cursor;
    565   1.7       cgd 	F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
    566   1.7       cgd 
    567   1.7       cgd 	curcopy = 0;
    568   1.7       cgd 	if (!F_ISSET(t, B_NODUPS)) {
    569   1.7       cgd 		/*
    570   1.7       cgd 		 * We're going to have to do comparisons.  If we weren't
    571   1.7       cgd 		 * provided a copy of the key, i.e. the user is deleting
    572   1.7       cgd 		 * the current cursor position, get one.
    573   1.7       cgd 		 */
    574   1.7       cgd 		if (key == NULL) {
    575   1.7       cgd 			e.page = h;
    576  1.10  christos 			e.index = idx;
    577   1.7       cgd 			if ((status = __bt_ret(t, &e,
    578   1.7       cgd 			    &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
    579   1.7       cgd 				return (status);
    580   1.7       cgd 			curcopy = 1;
    581   1.7       cgd 			key = &c->key;
    582   1.7       cgd 		}
    583   1.7       cgd 		/* Check previous key, if not at the beginning of the page. */
    584  1.10  christos 		if (idx > 0) {
    585   1.7       cgd 			e.page = h;
    586  1.10  christos 			e.index = idx - 1;
    587   1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    588   1.7       cgd 				F_SET(c, CURS_BEFORE);
    589   1.7       cgd 				goto dup2;
    590   1.7       cgd 			}
    591   1.7       cgd 		}
    592   1.7       cgd 		/* Check next key, if not at the end of the page. */
    593  1.10  christos 		if (idx < NEXTINDEX(h) - 1) {
    594   1.7       cgd 			e.page = h;
    595  1.10  christos 			e.index = idx + 1;
    596   1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    597   1.7       cgd 				F_SET(c, CURS_AFTER);
    598   1.7       cgd 				goto dup2;
    599   1.7       cgd 			}
    600   1.7       cgd 		}
    601   1.7       cgd 		/* Check previous key if at the beginning of the page. */
    602  1.10  christos 		if (idx == 0 && h->prevpg != P_INVALID) {
    603   1.7       cgd 			if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    604   1.7       cgd 				return (RET_ERROR);
    605   1.7       cgd 			e.page = pg;
    606   1.7       cgd 			e.index = NEXTINDEX(pg) - 1;
    607   1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    608   1.7       cgd 				F_SET(c, CURS_BEFORE);
    609   1.7       cgd 				goto dup1;
    610   1.7       cgd 			}
    611   1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    612   1.7       cgd 		}
    613   1.7       cgd 		/* Check next key if at the end of the page. */
    614  1.10  christos 		if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
    615   1.7       cgd 			if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    616   1.7       cgd 				return (RET_ERROR);
    617   1.7       cgd 			e.page = pg;
    618   1.7       cgd 			e.index = 0;
    619   1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    620   1.7       cgd 				F_SET(c, CURS_AFTER);
    621   1.7       cgd dup1:				mpool_put(t->bt_mp, pg, 0);
    622   1.7       cgd dup2:				c->pg.pgno = e.page->pgno;
    623   1.7       cgd 				c->pg.index = e.index;
    624   1.7       cgd 				return (RET_SUCCESS);
    625   1.7       cgd 			}
    626   1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    627   1.7       cgd 		}
    628   1.7       cgd 	}
    629   1.7       cgd 	e.page = h;
    630  1.10  christos 	e.index = idx;
    631   1.7       cgd 	if (curcopy || (status =
    632   1.7       cgd 	    __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
    633   1.7       cgd 		F_SET(c, CURS_ACQUIRE);
    634   1.7       cgd 		return (RET_SUCCESS);
    635   1.7       cgd 	}
    636   1.7       cgd 	return (status);
    637   1.7       cgd }
    638   1.7       cgd 
    639   1.7       cgd /*
    640   1.7       cgd  * __bt_relink --
    641   1.7       cgd  *	Link around a deleted page.
    642   1.7       cgd  *
    643   1.7       cgd  * Parameters:
    644   1.7       cgd  *	t:	tree
    645   1.7       cgd  *	h:	page to be deleted
    646   1.7       cgd  */
    647   1.7       cgd static int
    648   1.7       cgd __bt_relink(t, h)
    649   1.7       cgd 	BTREE *t;
    650   1.7       cgd 	PAGE *h;
    651   1.7       cgd {
    652   1.7       cgd 	PAGE *pg;
    653   1.7       cgd 
    654   1.7       cgd 	if (h->nextpg != P_INVALID) {
    655   1.7       cgd 		if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    656   1.7       cgd 			return (RET_ERROR);
    657   1.7       cgd 		pg->prevpg = h->prevpg;
    658   1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    659   1.7       cgd 	}
    660   1.7       cgd 	if (h->prevpg != P_INVALID) {
    661   1.7       cgd 		if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    662   1.7       cgd 			return (RET_ERROR);
    663   1.7       cgd 		pg->nextpg = h->nextpg;
    664   1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    665   1.7       cgd 	}
    666   1.7       cgd 	return (0);
    667   1.1       cgd }
    668