Home | History | Annotate | Line # | Download | only in btree
bt_delete.c revision 1.1.1.2
      1      1.1  cgd /*-
      2  1.1.1.1  cgd  * Copyright (c) 1990, 1993, 1994
      3      1.1  cgd  *	The Regents of the University of California.  All rights reserved.
      4      1.1  cgd  *
      5      1.1  cgd  * This code is derived from software contributed to Berkeley by
      6      1.1  cgd  * Mike Olson.
      7      1.1  cgd  *
      8      1.1  cgd  * Redistribution and use in source and binary forms, with or without
      9      1.1  cgd  * modification, are permitted provided that the following conditions
     10      1.1  cgd  * are met:
     11      1.1  cgd  * 1. Redistributions of source code must retain the above copyright
     12      1.1  cgd  *    notice, this list of conditions and the following disclaimer.
     13      1.1  cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14      1.1  cgd  *    notice, this list of conditions and the following disclaimer in the
     15      1.1  cgd  *    documentation and/or other materials provided with the distribution.
     16      1.1  cgd  * 3. All advertising materials mentioning features or use of this software
     17      1.1  cgd  *    must display the following acknowledgement:
     18      1.1  cgd  *	This product includes software developed by the University of
     19      1.1  cgd  *	California, Berkeley and its contributors.
     20      1.1  cgd  * 4. Neither the name of the University nor the names of its contributors
     21      1.1  cgd  *    may be used to endorse or promote products derived from this software
     22      1.1  cgd  *    without specific prior written permission.
     23      1.1  cgd  *
     24      1.1  cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25      1.1  cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26      1.1  cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27      1.1  cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28      1.1  cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29      1.1  cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30      1.1  cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31      1.1  cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32      1.1  cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33      1.1  cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34      1.1  cgd  * SUCH DAMAGE.
     35      1.1  cgd  */
     36      1.1  cgd 
     37      1.1  cgd #if defined(LIBC_SCCS) && !defined(lint)
     38  1.1.1.2  cgd static char sccsid[] = "@(#)bt_delete.c	8.13 (Berkeley) 7/28/94";
     39      1.1  cgd #endif /* LIBC_SCCS and not lint */
     40      1.1  cgd 
     41      1.1  cgd #include <sys/types.h>
     42      1.1  cgd 
     43      1.1  cgd #include <errno.h>
     44      1.1  cgd #include <stdio.h>
     45      1.1  cgd #include <string.h>
     46      1.1  cgd 
     47      1.1  cgd #include <db.h>
     48      1.1  cgd #include "btree.h"
     49      1.1  cgd 
     50  1.1.1.2  cgd static int __bt_bdelete __P((BTREE *, const DBT *));
     51  1.1.1.2  cgd static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int));
     52  1.1.1.2  cgd static int __bt_pdelete __P((BTREE *, PAGE *));
     53  1.1.1.2  cgd static int __bt_relink __P((BTREE *, PAGE *));
     54  1.1.1.2  cgd static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *));
     55      1.1  cgd 
     56      1.1  cgd /*
     57  1.1.1.2  cgd  * __bt_delete
     58  1.1.1.2  cgd  *	Delete the item(s) referenced by a key.
     59      1.1  cgd  *
     60  1.1.1.2  cgd  * Return RET_SPECIAL if the key is not found.
     61      1.1  cgd  */
     62      1.1  cgd int
     63      1.1  cgd __bt_delete(dbp, key, flags)
     64      1.1  cgd 	const DB *dbp;
     65      1.1  cgd 	const DBT *key;
     66      1.1  cgd 	u_int flags;
     67      1.1  cgd {
     68      1.1  cgd 	BTREE *t;
     69  1.1.1.2  cgd 	CURSOR *c;
     70  1.1.1.2  cgd 	PAGE *h;
     71      1.1  cgd 	int status;
     72      1.1  cgd 
     73      1.1  cgd 	t = dbp->internal;
     74  1.1.1.1  cgd 
     75  1.1.1.1  cgd 	/* Toss any page pinned across calls. */
     76  1.1.1.1  cgd 	if (t->bt_pinned != NULL) {
     77  1.1.1.1  cgd 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     78  1.1.1.1  cgd 		t->bt_pinned = NULL;
     79  1.1.1.1  cgd 	}
     80  1.1.1.1  cgd 
     81  1.1.1.2  cgd 	/* Check for change to a read-only tree. */
     82  1.1.1.2  cgd 	if (F_ISSET(t, B_RDONLY)) {
     83      1.1  cgd 		errno = EPERM;
     84      1.1  cgd 		return (RET_ERROR);
     85      1.1  cgd 	}
     86  1.1.1.1  cgd 
     87  1.1.1.2  cgd 	switch (flags) {
     88      1.1  cgd 	case 0:
     89  1.1.1.2  cgd 		status = __bt_bdelete(t, key);
     90      1.1  cgd 		break;
     91      1.1  cgd 	case R_CURSOR:
     92      1.1  cgd 		/*
     93  1.1.1.2  cgd 		 * If flags is R_CURSOR, delete the cursor.  Must already
     94  1.1.1.2  cgd 		 * have started a scan and not have already deleted it.
     95      1.1  cgd 		 */
     96  1.1.1.2  cgd 		c = &t->bt_cursor;
     97  1.1.1.2  cgd 		if (F_ISSET(c, CURS_INIT)) {
     98  1.1.1.2  cgd 			if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
     99  1.1.1.2  cgd 				return (RET_SPECIAL);
    100  1.1.1.2  cgd 			if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
    101  1.1.1.2  cgd 				return (RET_ERROR);
    102  1.1.1.2  cgd 
    103  1.1.1.2  cgd 			/*
    104  1.1.1.2  cgd 			 * If the page is about to be emptied, we'll need to
    105  1.1.1.2  cgd 			 * delete it, which means we have to acquire a stack.
    106  1.1.1.2  cgd 			 */
    107  1.1.1.2  cgd 			if (NEXTINDEX(h) == 1)
    108  1.1.1.2  cgd 				if (__bt_stkacq(t, &h, &t->bt_cursor))
    109  1.1.1.2  cgd 					return (RET_ERROR);
    110  1.1.1.2  cgd 
    111  1.1.1.2  cgd 			status = __bt_dleaf(t, NULL, h, c->pg.index);
    112  1.1.1.2  cgd 
    113  1.1.1.2  cgd 			if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
    114  1.1.1.2  cgd 				if (__bt_pdelete(t, h))
    115  1.1.1.2  cgd 					return (RET_ERROR);
    116  1.1.1.2  cgd 			} else
    117  1.1.1.2  cgd 				mpool_put(t->bt_mp,
    118  1.1.1.2  cgd 				    h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
    119  1.1.1.2  cgd 			break;
    120  1.1.1.2  cgd 		}
    121  1.1.1.2  cgd 		/* FALLTHROUGH */
    122      1.1  cgd 	default:
    123  1.1.1.2  cgd 		errno = EINVAL;
    124      1.1  cgd 		return (RET_ERROR);
    125      1.1  cgd 	}
    126      1.1  cgd 	if (status == RET_SUCCESS)
    127  1.1.1.2  cgd 		F_SET(t, B_MODIFIED);
    128      1.1  cgd 	return (status);
    129      1.1  cgd }
    130      1.1  cgd 
    131      1.1  cgd /*
    132  1.1.1.2  cgd  * __bt_stkacq --
    133  1.1.1.2  cgd  *	Acquire a stack so we can delete a cursor entry.
    134  1.1.1.2  cgd  *
    135  1.1.1.2  cgd  * Parameters:
    136  1.1.1.2  cgd  *	  t:	tree
    137  1.1.1.2  cgd  *	 hp:	pointer to current, pinned PAGE pointer
    138  1.1.1.2  cgd  *	  c:	pointer to the cursor
    139  1.1.1.2  cgd  *
    140  1.1.1.2  cgd  * Returns:
    141  1.1.1.2  cgd  *	0 on success, 1 on failure
    142  1.1.1.2  cgd  */
    143  1.1.1.2  cgd static int
    144  1.1.1.2  cgd __bt_stkacq(t, hp, c)
    145  1.1.1.2  cgd 	BTREE *t;
    146  1.1.1.2  cgd 	PAGE **hp;
    147  1.1.1.2  cgd 	CURSOR *c;
    148  1.1.1.2  cgd {
    149  1.1.1.2  cgd 	BINTERNAL *bi;
    150  1.1.1.2  cgd 	EPG *e;
    151  1.1.1.2  cgd 	EPGNO *parent;
    152  1.1.1.2  cgd 	PAGE *h;
    153  1.1.1.2  cgd 	indx_t index;
    154  1.1.1.2  cgd 	pgno_t pgno;
    155  1.1.1.2  cgd 	recno_t nextpg, prevpg;
    156  1.1.1.2  cgd 	int exact, level;
    157  1.1.1.2  cgd 
    158  1.1.1.2  cgd 	/*
    159  1.1.1.2  cgd 	 * Find the first occurrence of the key in the tree.  Toss the
    160  1.1.1.2  cgd 	 * currently locked page so we don't hit an already-locked page.
    161  1.1.1.2  cgd 	 */
    162  1.1.1.2  cgd 	h = *hp;
    163  1.1.1.2  cgd 	mpool_put(t->bt_mp, h, 0);
    164  1.1.1.2  cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    165  1.1.1.2  cgd 		return (1);
    166  1.1.1.2  cgd 	h = e->page;
    167  1.1.1.2  cgd 
    168  1.1.1.2  cgd 	/* See if we got it in one shot. */
    169  1.1.1.2  cgd 	if (h->pgno == c->pg.pgno)
    170  1.1.1.2  cgd 		goto ret;
    171  1.1.1.2  cgd 
    172  1.1.1.2  cgd 	/*
    173  1.1.1.2  cgd 	 * Move right, looking for the page.  At each move we have to move
    174  1.1.1.2  cgd 	 * up the stack until we don't have to move to the next page.  If
    175  1.1.1.2  cgd 	 * we have to change pages at an internal level, we have to fix the
    176  1.1.1.2  cgd 	 * stack back up.
    177  1.1.1.2  cgd 	 */
    178  1.1.1.2  cgd 	while (h->pgno != c->pg.pgno) {
    179  1.1.1.2  cgd 		if ((nextpg = h->nextpg) == P_INVALID)
    180  1.1.1.2  cgd 			break;
    181  1.1.1.2  cgd 		mpool_put(t->bt_mp, h, 0);
    182  1.1.1.2  cgd 
    183  1.1.1.2  cgd 		/* Move up the stack. */
    184  1.1.1.2  cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    185  1.1.1.2  cgd 			/* Get the parent page. */
    186  1.1.1.2  cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    187  1.1.1.2  cgd 				return (1);
    188  1.1.1.2  cgd 
    189  1.1.1.2  cgd 			/* Move to the next index. */
    190  1.1.1.2  cgd 			if (parent->index != NEXTINDEX(h) - 1) {
    191  1.1.1.2  cgd 				index = parent->index + 1;
    192  1.1.1.2  cgd 				BT_PUSH(t, h->pgno, index);
    193  1.1.1.2  cgd 				break;
    194  1.1.1.2  cgd 			}
    195  1.1.1.2  cgd 			mpool_put(t->bt_mp, h, 0);
    196  1.1.1.2  cgd 		}
    197  1.1.1.2  cgd 
    198  1.1.1.2  cgd 		/* Restore the stack. */
    199  1.1.1.2  cgd 		while (level--) {
    200  1.1.1.2  cgd 			/* Push the next level down onto the stack. */
    201  1.1.1.2  cgd 			bi = GETBINTERNAL(h, index);
    202  1.1.1.2  cgd 			pgno = bi->pgno;
    203  1.1.1.2  cgd 			BT_PUSH(t, pgno, 0);
    204  1.1.1.2  cgd 
    205  1.1.1.2  cgd 			/* Lose the currently pinned page. */
    206  1.1.1.2  cgd 			mpool_put(t->bt_mp, h, 0);
    207  1.1.1.2  cgd 
    208  1.1.1.2  cgd 			/* Get the next level down. */
    209  1.1.1.2  cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    210  1.1.1.2  cgd 				return (1);
    211  1.1.1.2  cgd 			index = 0;
    212  1.1.1.2  cgd 		}
    213  1.1.1.2  cgd 		mpool_put(t->bt_mp, h, 0);
    214  1.1.1.2  cgd 		if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
    215  1.1.1.2  cgd 			return (1);
    216  1.1.1.2  cgd 	}
    217  1.1.1.2  cgd 
    218  1.1.1.2  cgd 	if (h->pgno == c->pg.pgno)
    219  1.1.1.2  cgd 		goto ret;
    220  1.1.1.2  cgd 
    221  1.1.1.2  cgd 	/* Reacquire the original stack. */
    222  1.1.1.2  cgd 	mpool_put(t->bt_mp, h, 0);
    223  1.1.1.2  cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    224  1.1.1.2  cgd 		return (1);
    225  1.1.1.2  cgd 	h = e->page;
    226  1.1.1.2  cgd 
    227  1.1.1.2  cgd 	/*
    228  1.1.1.2  cgd 	 * Move left, looking for the page.  At each move we have to move
    229  1.1.1.2  cgd 	 * up the stack until we don't have to change pages to move to the
    230  1.1.1.2  cgd 	 * next page.  If we have to change pages at an internal level, we
    231  1.1.1.2  cgd 	 * have to fix the stack back up.
    232  1.1.1.2  cgd 	 */
    233  1.1.1.2  cgd 	while (h->pgno != c->pg.pgno) {
    234  1.1.1.2  cgd 		if ((prevpg = h->prevpg) == P_INVALID)
    235  1.1.1.2  cgd 			break;
    236  1.1.1.2  cgd 		mpool_put(t->bt_mp, h, 0);
    237  1.1.1.2  cgd 
    238  1.1.1.2  cgd 		/* Move up the stack. */
    239  1.1.1.2  cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    240  1.1.1.2  cgd 			/* Get the parent page. */
    241  1.1.1.2  cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    242  1.1.1.2  cgd 				return (1);
    243  1.1.1.2  cgd 
    244  1.1.1.2  cgd 			/* Move to the next index. */
    245  1.1.1.2  cgd 			if (parent->index != 0) {
    246  1.1.1.2  cgd 				index = parent->index - 1;
    247  1.1.1.2  cgd 				BT_PUSH(t, h->pgno, index);
    248  1.1.1.2  cgd 				break;
    249  1.1.1.2  cgd 			}
    250  1.1.1.2  cgd 			mpool_put(t->bt_mp, h, 0);
    251  1.1.1.2  cgd 		}
    252  1.1.1.2  cgd 
    253  1.1.1.2  cgd 		/* Restore the stack. */
    254  1.1.1.2  cgd 		while (level--) {
    255  1.1.1.2  cgd 			/* Push the next level down onto the stack. */
    256  1.1.1.2  cgd 			bi = GETBINTERNAL(h, index);
    257  1.1.1.2  cgd 			pgno = bi->pgno;
    258  1.1.1.2  cgd 
    259  1.1.1.2  cgd 			/* Lose the currently pinned page. */
    260  1.1.1.2  cgd 			mpool_put(t->bt_mp, h, 0);
    261  1.1.1.2  cgd 
    262  1.1.1.2  cgd 			/* Get the next level down. */
    263  1.1.1.2  cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    264  1.1.1.2  cgd 				return (1);
    265  1.1.1.2  cgd 
    266  1.1.1.2  cgd 			index = NEXTINDEX(h) - 1;
    267  1.1.1.2  cgd 			BT_PUSH(t, pgno, index);
    268  1.1.1.2  cgd 		}
    269  1.1.1.2  cgd 		mpool_put(t->bt_mp, h, 0);
    270  1.1.1.2  cgd 		if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
    271  1.1.1.2  cgd 			return (1);
    272  1.1.1.2  cgd 	}
    273  1.1.1.2  cgd 
    274  1.1.1.2  cgd 
    275  1.1.1.2  cgd ret:	mpool_put(t->bt_mp, h, 0);
    276  1.1.1.2  cgd 	return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
    277  1.1.1.2  cgd }
    278  1.1.1.2  cgd 
    279  1.1.1.2  cgd /*
    280  1.1.1.2  cgd  * __bt_bdelete --
    281  1.1.1.2  cgd  *	Delete all key/data pairs matching the specified key.
    282      1.1  cgd  *
    283      1.1  cgd  * Parameters:
    284  1.1.1.2  cgd  *	  t:	tree
    285      1.1  cgd  *	key:	key to delete
    286      1.1  cgd  *
    287      1.1  cgd  * Returns:
    288      1.1  cgd  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
    289      1.1  cgd  */
    290      1.1  cgd static int
    291  1.1.1.2  cgd __bt_bdelete(t, key)
    292      1.1  cgd 	BTREE *t;
    293      1.1  cgd 	const DBT *key;
    294      1.1  cgd {
    295  1.1.1.2  cgd 	EPG *e;
    296      1.1  cgd 	PAGE *h;
    297  1.1.1.2  cgd 	int deleted, exact, redo;
    298  1.1.1.2  cgd 
    299  1.1.1.2  cgd 	deleted = 0;
    300      1.1  cgd 
    301      1.1  cgd 	/* Find any matching record; __bt_search pins the page. */
    302  1.1.1.2  cgd loop:	if ((e = __bt_search(t, key, &exact)) == NULL)
    303  1.1.1.2  cgd 		return (deleted ? RET_SUCCESS : RET_ERROR);
    304      1.1  cgd 	if (!exact) {
    305      1.1  cgd 		mpool_put(t->bt_mp, e->page, 0);
    306  1.1.1.2  cgd 		return (deleted ? RET_SUCCESS : RET_SPECIAL);
    307      1.1  cgd 	}
    308      1.1  cgd 
    309      1.1  cgd 	/*
    310  1.1.1.2  cgd 	 * Delete forward, then delete backward, from the found key.  If
    311  1.1.1.2  cgd 	 * there are duplicates and we reach either side of the page, do
    312  1.1.1.2  cgd 	 * the key search again, so that we get them all.
    313      1.1  cgd 	 */
    314  1.1.1.2  cgd 	redo = 0;
    315  1.1.1.2  cgd 	h = e->page;
    316  1.1.1.2  cgd 	do {
    317  1.1.1.2  cgd 		if (__bt_dleaf(t, key, h, e->index)) {
    318  1.1.1.2  cgd 			mpool_put(t->bt_mp, h, 0);
    319  1.1.1.2  cgd 			return (RET_ERROR);
    320  1.1.1.2  cgd 		}
    321  1.1.1.2  cgd 		if (F_ISSET(t, B_NODUPS)) {
    322  1.1.1.2  cgd 			if (NEXTINDEX(h) == 0) {
    323  1.1.1.2  cgd 				if (__bt_pdelete(t, h))
    324      1.1  cgd 					return (RET_ERROR);
    325  1.1.1.2  cgd 			} else
    326  1.1.1.2  cgd 				mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    327  1.1.1.2  cgd 			return (RET_SUCCESS);
    328      1.1  cgd 		}
    329  1.1.1.2  cgd 		deleted = 1;
    330  1.1.1.2  cgd 	} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
    331      1.1  cgd 
    332  1.1.1.2  cgd 	/* Check for right-hand edge of the page. */
    333  1.1.1.2  cgd 	if (e->index == NEXTINDEX(h))
    334  1.1.1.2  cgd 		redo = 1;
    335  1.1.1.2  cgd 
    336  1.1.1.2  cgd 	/* Delete from the key to the beginning of the page. */
    337  1.1.1.2  cgd 	while (e->index-- > 0) {
    338      1.1  cgd 		if (__bt_cmp(t, key, e) != 0)
    339      1.1  cgd 			break;
    340  1.1.1.2  cgd 		if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
    341  1.1.1.2  cgd 			mpool_put(t->bt_mp, h, 0);
    342  1.1.1.2  cgd 			return (RET_ERROR);
    343  1.1.1.2  cgd 		}
    344  1.1.1.2  cgd 		if (e->index == 0)
    345  1.1.1.2  cgd 			redo = 1;
    346      1.1  cgd 	}
    347      1.1  cgd 
    348  1.1.1.2  cgd 	/* Check for an empty page. */
    349  1.1.1.2  cgd 	if (NEXTINDEX(h) == 0) {
    350  1.1.1.2  cgd 		if (__bt_pdelete(t, h))
    351  1.1.1.2  cgd 			return (RET_ERROR);
    352  1.1.1.2  cgd 		goto loop;
    353  1.1.1.2  cgd 	}
    354  1.1.1.2  cgd 
    355  1.1.1.2  cgd 	/* Put the page. */
    356  1.1.1.2  cgd 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    357  1.1.1.2  cgd 
    358  1.1.1.2  cgd 	if (redo)
    359  1.1.1.2  cgd 		goto loop;
    360  1.1.1.2  cgd 	return (RET_SUCCESS);
    361  1.1.1.2  cgd }
    362  1.1.1.2  cgd 
    363  1.1.1.2  cgd /*
    364  1.1.1.2  cgd  * __bt_pdelete --
    365  1.1.1.2  cgd  *	Delete a single page from the tree.
    366  1.1.1.2  cgd  *
    367  1.1.1.2  cgd  * Parameters:
    368  1.1.1.2  cgd  *	t:	tree
    369  1.1.1.2  cgd  *	h:	leaf page
    370  1.1.1.2  cgd  *
    371  1.1.1.2  cgd  * Returns:
    372  1.1.1.2  cgd  *	RET_SUCCESS, RET_ERROR.
    373  1.1.1.2  cgd  *
    374  1.1.1.2  cgd  * Side-effects:
    375  1.1.1.2  cgd  *	mpool_put's the page
    376  1.1.1.2  cgd  */
    377  1.1.1.2  cgd static int
    378  1.1.1.2  cgd __bt_pdelete(t, h)
    379  1.1.1.2  cgd 	BTREE *t;
    380  1.1.1.2  cgd 	PAGE *h;
    381  1.1.1.2  cgd {
    382  1.1.1.2  cgd 	BINTERNAL *bi;
    383  1.1.1.2  cgd 	PAGE *pg;
    384  1.1.1.2  cgd 	EPGNO *parent;
    385  1.1.1.2  cgd 	indx_t cnt, index, *ip, offset;
    386  1.1.1.2  cgd 	u_int32_t nksize;
    387  1.1.1.2  cgd 	char *from;
    388      1.1  cgd 
    389      1.1  cgd 	/*
    390  1.1.1.2  cgd 	 * Walk the parent page stack -- a LIFO stack of the pages that were
    391  1.1.1.2  cgd 	 * traversed when we searched for the page where the delete occurred.
    392  1.1.1.2  cgd 	 * Each stack entry is a page number and a page index offset.  The
    393  1.1.1.2  cgd 	 * offset is for the page traversed on the search.  We've just deleted
    394  1.1.1.2  cgd 	 * a page, so we have to delete the key from the parent page.
    395  1.1.1.2  cgd 	 *
    396  1.1.1.2  cgd 	 * If the delete from the parent page makes it empty, this process may
    397  1.1.1.2  cgd 	 * continue all the way up the tree.  We stop if we reach the root page
    398  1.1.1.2  cgd 	 * (which is never deleted, it's just not worth the effort) or if the
    399  1.1.1.2  cgd 	 * delete does not empty the page.
    400      1.1  cgd 	 */
    401  1.1.1.2  cgd 	while ((parent = BT_POP(t)) != NULL) {
    402  1.1.1.2  cgd 		/* Get the parent page. */
    403  1.1.1.2  cgd 		if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    404  1.1.1.2  cgd 			return (RET_ERROR);
    405  1.1.1.2  cgd 
    406  1.1.1.2  cgd 		index = parent->index;
    407  1.1.1.2  cgd 		bi = GETBINTERNAL(pg, index);
    408  1.1.1.2  cgd 
    409  1.1.1.2  cgd 		/* Free any overflow pages. */
    410  1.1.1.2  cgd 		if (bi->flags & P_BIGKEY &&
    411  1.1.1.2  cgd 		    __ovfl_delete(t, bi->bytes) == RET_ERROR) {
    412  1.1.1.2  cgd 			mpool_put(t->bt_mp, pg, 0);
    413  1.1.1.2  cgd 			return (RET_ERROR);
    414  1.1.1.2  cgd 		}
    415  1.1.1.2  cgd 
    416  1.1.1.2  cgd 		/*
    417  1.1.1.2  cgd 		 * Free the parent if it has only the one key and it's not the
    418  1.1.1.2  cgd 		 * root page. If it's the rootpage, turn it back into an empty
    419  1.1.1.2  cgd 		 * leaf page.
    420  1.1.1.2  cgd 		 */
    421  1.1.1.2  cgd 		if (NEXTINDEX(pg) == 1)
    422  1.1.1.2  cgd 			if (pg->pgno == P_ROOT) {
    423  1.1.1.2  cgd 				pg->lower = BTDATAOFF;
    424  1.1.1.2  cgd 				pg->upper = t->bt_psize;
    425  1.1.1.2  cgd 				pg->flags = P_BLEAF;
    426      1.1  cgd 			} else {
    427  1.1.1.2  cgd 				if (__bt_relink(t, pg) || __bt_free(t, pg))
    428      1.1  cgd 					return (RET_ERROR);
    429  1.1.1.2  cgd 				continue;
    430      1.1  cgd 			}
    431  1.1.1.2  cgd 		else {
    432  1.1.1.2  cgd 			/* Pack remaining key items at the end of the page. */
    433  1.1.1.2  cgd 			nksize = NBINTERNAL(bi->ksize);
    434  1.1.1.2  cgd 			from = (char *)pg + pg->upper;
    435  1.1.1.2  cgd 			memmove(from + nksize, from, (char *)bi - from);
    436  1.1.1.2  cgd 			pg->upper += nksize;
    437  1.1.1.2  cgd 
    438  1.1.1.2  cgd 			/* Adjust indices' offsets, shift the indices down. */
    439  1.1.1.2  cgd 			offset = pg->linp[index];
    440  1.1.1.2  cgd 			for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip)
    441  1.1.1.2  cgd 				if (ip[0] < offset)
    442  1.1.1.2  cgd 					ip[0] += nksize;
    443  1.1.1.2  cgd 			for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip)
    444  1.1.1.2  cgd 				ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
    445  1.1.1.2  cgd 			pg->lower -= sizeof(indx_t);
    446      1.1  cgd 		}
    447      1.1  cgd 
    448  1.1.1.2  cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    449  1.1.1.2  cgd 		break;
    450      1.1  cgd 	}
    451      1.1  cgd 
    452  1.1.1.2  cgd 	/* Free the leaf page, as long as it wasn't the root. */
    453  1.1.1.2  cgd 	if (h->pgno == P_ROOT) {
    454  1.1.1.2  cgd 		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    455  1.1.1.2  cgd 		return (RET_SUCCESS);
    456  1.1.1.2  cgd 	}
    457  1.1.1.2  cgd 	return (__bt_relink(t, h) || __bt_free(t, h));
    458      1.1  cgd }
    459      1.1  cgd 
    460      1.1  cgd /*
    461  1.1.1.2  cgd  * __bt_dleaf --
    462  1.1.1.2  cgd  *	Delete a single record from a leaf page.
    463      1.1  cgd  *
    464      1.1  cgd  * Parameters:
    465      1.1  cgd  *	t:	tree
    466  1.1.1.2  cgd  *    key:	referenced key
    467  1.1.1.2  cgd  *	h:	page
    468  1.1.1.2  cgd  *	index:	index on page to delete
    469      1.1  cgd  *
    470      1.1  cgd  * Returns:
    471      1.1  cgd  *	RET_SUCCESS, RET_ERROR.
    472      1.1  cgd  */
    473      1.1  cgd int
    474  1.1.1.2  cgd __bt_dleaf(t, key, h, index)
    475      1.1  cgd 	BTREE *t;
    476  1.1.1.2  cgd 	const DBT *key;
    477      1.1  cgd 	PAGE *h;
    478  1.1.1.2  cgd 	u_int index;
    479      1.1  cgd {
    480  1.1.1.2  cgd 	BLEAF *bl;
    481  1.1.1.2  cgd 	indx_t cnt, *ip, offset;
    482  1.1.1.2  cgd 	u_int32_t nbytes;
    483      1.1  cgd 	void *to;
    484  1.1.1.2  cgd 	char *from;
    485      1.1  cgd 
    486  1.1.1.2  cgd 	/* If this record is referenced by the cursor, delete the cursor. */
    487  1.1.1.2  cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    488  1.1.1.2  cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    489  1.1.1.2  cgd 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index &&
    490  1.1.1.2  cgd 	    __bt_curdel(t, key, h, index))
    491  1.1.1.2  cgd 		return (RET_ERROR);
    492  1.1.1.2  cgd 
    493  1.1.1.2  cgd 	/* If the entry uses overflow pages, make them available for reuse. */
    494      1.1  cgd 	to = bl = GETBLEAF(h, index);
    495      1.1  cgd 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
    496      1.1  cgd 		return (RET_ERROR);
    497      1.1  cgd 	if (bl->flags & P_BIGDATA &&
    498      1.1  cgd 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
    499      1.1  cgd 		return (RET_ERROR);
    500      1.1  cgd 
    501  1.1.1.2  cgd 	/* Pack the remaining key/data items at the end of the page. */
    502  1.1.1.2  cgd 	nbytes = NBLEAF(bl);
    503      1.1  cgd 	from = (char *)h + h->upper;
    504      1.1  cgd 	memmove(from + nbytes, from, (char *)to - from);
    505      1.1  cgd 	h->upper += nbytes;
    506      1.1  cgd 
    507  1.1.1.2  cgd 	/* Adjust the indices' offsets, shift the indices down. */
    508      1.1  cgd 	offset = h->linp[index];
    509      1.1  cgd 	for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
    510      1.1  cgd 		if (ip[0] < offset)
    511      1.1  cgd 			ip[0] += nbytes;
    512      1.1  cgd 	for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
    513      1.1  cgd 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
    514      1.1  cgd 	h->lower -= sizeof(indx_t);
    515  1.1.1.2  cgd 
    516  1.1.1.2  cgd 	/* If the cursor is on this page, adjust it as necessary. */
    517  1.1.1.2  cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    518  1.1.1.2  cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    519  1.1.1.2  cgd 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index)
    520  1.1.1.2  cgd 		--t->bt_cursor.pg.index;
    521  1.1.1.2  cgd 
    522      1.1  cgd 	return (RET_SUCCESS);
    523  1.1.1.2  cgd }
    524  1.1.1.2  cgd 
    525  1.1.1.2  cgd /*
    526  1.1.1.2  cgd  * __bt_curdel --
    527  1.1.1.2  cgd  *	Delete the cursor.
    528  1.1.1.2  cgd  *
    529  1.1.1.2  cgd  * Parameters:
    530  1.1.1.2  cgd  *	t:	tree
    531  1.1.1.2  cgd  *    key:	referenced key (or NULL)
    532  1.1.1.2  cgd  *	h:	page
    533  1.1.1.2  cgd  *  index:	index on page to delete
    534  1.1.1.2  cgd  *
    535  1.1.1.2  cgd  * Returns:
    536  1.1.1.2  cgd  *	RET_SUCCESS, RET_ERROR.
    537  1.1.1.2  cgd  */
    538  1.1.1.2  cgd static int
    539  1.1.1.2  cgd __bt_curdel(t, key, h, index)
    540  1.1.1.2  cgd 	BTREE *t;
    541  1.1.1.2  cgd 	const DBT *key;
    542  1.1.1.2  cgd 	PAGE *h;
    543  1.1.1.2  cgd 	u_int index;
    544  1.1.1.2  cgd {
    545  1.1.1.2  cgd 	CURSOR *c;
    546  1.1.1.2  cgd 	EPG e;
    547  1.1.1.2  cgd 	PAGE *pg;
    548  1.1.1.2  cgd 	int curcopy, status;
    549  1.1.1.2  cgd 
    550  1.1.1.2  cgd 	/*
    551  1.1.1.2  cgd 	 * If there are duplicates, move forward or backward to one.
    552  1.1.1.2  cgd 	 * Otherwise, copy the key into the cursor area.
    553  1.1.1.2  cgd 	 */
    554  1.1.1.2  cgd 	c = &t->bt_cursor;
    555  1.1.1.2  cgd 	F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
    556  1.1.1.2  cgd 
    557  1.1.1.2  cgd 	curcopy = 0;
    558  1.1.1.2  cgd 	if (!F_ISSET(t, B_NODUPS)) {
    559  1.1.1.2  cgd 		/*
    560  1.1.1.2  cgd 		 * We're going to have to do comparisons.  If we weren't
    561  1.1.1.2  cgd 		 * provided a copy of the key, i.e. the user is deleting
    562  1.1.1.2  cgd 		 * the current cursor position, get one.
    563  1.1.1.2  cgd 		 */
    564  1.1.1.2  cgd 		if (key == NULL) {
    565  1.1.1.2  cgd 			e.page = h;
    566  1.1.1.2  cgd 			e.index = index;
    567  1.1.1.2  cgd 			if ((status = __bt_ret(t, &e,
    568  1.1.1.2  cgd 			    &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
    569  1.1.1.2  cgd 				return (status);
    570  1.1.1.2  cgd 			curcopy = 1;
    571  1.1.1.2  cgd 			key = &c->key;
    572  1.1.1.2  cgd 		}
    573  1.1.1.2  cgd 		/* Check previous key, if not at the beginning of the page. */
    574  1.1.1.2  cgd 		if (index > 0) {
    575  1.1.1.2  cgd 			e.page = h;
    576  1.1.1.2  cgd 			e.index = index - 1;
    577  1.1.1.2  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    578  1.1.1.2  cgd 				F_SET(c, CURS_BEFORE);
    579  1.1.1.2  cgd 				goto dup2;
    580  1.1.1.2  cgd 			}
    581  1.1.1.2  cgd 		}
    582  1.1.1.2  cgd 		/* Check next key, if not at the end of the page. */
    583  1.1.1.2  cgd 		if (index < NEXTINDEX(h) - 1) {
    584  1.1.1.2  cgd 			e.page = h;
    585  1.1.1.2  cgd 			e.index = index + 1;
    586  1.1.1.2  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    587  1.1.1.2  cgd 				F_SET(c, CURS_AFTER);
    588  1.1.1.2  cgd 				goto dup2;
    589  1.1.1.2  cgd 			}
    590  1.1.1.2  cgd 		}
    591  1.1.1.2  cgd 		/* Check previous key if at the beginning of the page. */
    592  1.1.1.2  cgd 		if (index == 0 && h->prevpg != P_INVALID) {
    593  1.1.1.2  cgd 			if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    594  1.1.1.2  cgd 				return (RET_ERROR);
    595  1.1.1.2  cgd 			e.page = pg;
    596  1.1.1.2  cgd 			e.index = NEXTINDEX(pg) - 1;
    597  1.1.1.2  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    598  1.1.1.2  cgd 				F_SET(c, CURS_BEFORE);
    599  1.1.1.2  cgd 				goto dup1;
    600  1.1.1.2  cgd 			}
    601  1.1.1.2  cgd 			mpool_put(t->bt_mp, pg, 0);
    602  1.1.1.2  cgd 		}
    603  1.1.1.2  cgd 		/* Check next key if at the end of the page. */
    604  1.1.1.2  cgd 		if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
    605  1.1.1.2  cgd 			if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    606  1.1.1.2  cgd 				return (RET_ERROR);
    607  1.1.1.2  cgd 			e.page = pg;
    608  1.1.1.2  cgd 			e.index = 0;
    609  1.1.1.2  cgd 			if (__bt_cmp(t, key, &e) == 0) {
    610  1.1.1.2  cgd 				F_SET(c, CURS_AFTER);
    611  1.1.1.2  cgd dup1:				mpool_put(t->bt_mp, pg, 0);
    612  1.1.1.2  cgd dup2:				c->pg.pgno = e.page->pgno;
    613  1.1.1.2  cgd 				c->pg.index = e.index;
    614  1.1.1.2  cgd 				return (RET_SUCCESS);
    615  1.1.1.2  cgd 			}
    616  1.1.1.2  cgd 			mpool_put(t->bt_mp, pg, 0);
    617  1.1.1.2  cgd 		}
    618  1.1.1.2  cgd 	}
    619  1.1.1.2  cgd 	e.page = h;
    620  1.1.1.2  cgd 	e.index = index;
    621  1.1.1.2  cgd 	if (curcopy || (status =
    622  1.1.1.2  cgd 	    __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
    623  1.1.1.2  cgd 		F_SET(c, CURS_ACQUIRE);
    624  1.1.1.2  cgd 		return (RET_SUCCESS);
    625  1.1.1.2  cgd 	}
    626  1.1.1.2  cgd 	return (status);
    627  1.1.1.2  cgd }
    628  1.1.1.2  cgd 
    629  1.1.1.2  cgd /*
    630  1.1.1.2  cgd  * __bt_relink --
    631  1.1.1.2  cgd  *	Link around a deleted page.
    632  1.1.1.2  cgd  *
    633  1.1.1.2  cgd  * Parameters:
    634  1.1.1.2  cgd  *	t:	tree
    635  1.1.1.2  cgd  *	h:	page to be deleted
    636  1.1.1.2  cgd  */
    637  1.1.1.2  cgd static int
    638  1.1.1.2  cgd __bt_relink(t, h)
    639  1.1.1.2  cgd 	BTREE *t;
    640  1.1.1.2  cgd 	PAGE *h;
    641  1.1.1.2  cgd {
    642  1.1.1.2  cgd 	PAGE *pg;
    643  1.1.1.2  cgd 
    644  1.1.1.2  cgd 	if (h->nextpg != P_INVALID) {
    645  1.1.1.2  cgd 		if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    646  1.1.1.2  cgd 			return (RET_ERROR);
    647  1.1.1.2  cgd 		pg->prevpg = h->prevpg;
    648  1.1.1.2  cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    649  1.1.1.2  cgd 	}
    650  1.1.1.2  cgd 	if (h->prevpg != P_INVALID) {
    651  1.1.1.2  cgd 		if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    652  1.1.1.2  cgd 			return (RET_ERROR);
    653  1.1.1.2  cgd 		pg->nextpg = h->nextpg;
    654  1.1.1.2  cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    655  1.1.1.2  cgd 	}
    656  1.1.1.2  cgd 	return (0);
    657      1.1  cgd }
    658