Home | History | Annotate | Line # | Download | only in btree
bt_delete.c revision 1.16.6.2
      1  1.16.6.2  joerg /*	$NetBSD: bt_delete.c,v 1.16.6.2 2008/09/11 12:58:01 joerg Exp $	*/
      2  1.16.6.2  joerg 
      3  1.16.6.2  joerg /*-
      4  1.16.6.2  joerg  * Copyright (c) 1990, 1993, 1994
      5  1.16.6.2  joerg  *	The Regents of the University of California.  All rights reserved.
      6  1.16.6.2  joerg  *
      7  1.16.6.2  joerg  * This code is derived from software contributed to Berkeley by
      8  1.16.6.2  joerg  * Mike Olson.
      9  1.16.6.2  joerg  *
     10  1.16.6.2  joerg  * Redistribution and use in source and binary forms, with or without
     11  1.16.6.2  joerg  * modification, are permitted provided that the following conditions
     12  1.16.6.2  joerg  * are met:
     13  1.16.6.2  joerg  * 1. Redistributions of source code must retain the above copyright
     14  1.16.6.2  joerg  *    notice, this list of conditions and the following disclaimer.
     15  1.16.6.2  joerg  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.16.6.2  joerg  *    notice, this list of conditions and the following disclaimer in the
     17  1.16.6.2  joerg  *    documentation and/or other materials provided with the distribution.
     18  1.16.6.2  joerg  * 3. Neither the name of the University nor the names of its contributors
     19  1.16.6.2  joerg  *    may be used to endorse or promote products derived from this software
     20  1.16.6.2  joerg  *    without specific prior written permission.
     21  1.16.6.2  joerg  *
     22  1.16.6.2  joerg  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  1.16.6.2  joerg  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  1.16.6.2  joerg  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  1.16.6.2  joerg  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  1.16.6.2  joerg  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  1.16.6.2  joerg  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  1.16.6.2  joerg  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  1.16.6.2  joerg  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  1.16.6.2  joerg  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  1.16.6.2  joerg  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  1.16.6.2  joerg  * SUCH DAMAGE.
     33  1.16.6.2  joerg  */
     34  1.16.6.2  joerg 
     35  1.16.6.2  joerg #if HAVE_NBTOOL_CONFIG_H
     36  1.16.6.2  joerg #include "nbtool_config.h"
     37  1.16.6.2  joerg #endif
     38  1.16.6.2  joerg 
     39  1.16.6.2  joerg #include <sys/cdefs.h>
     40  1.16.6.2  joerg __RCSID("$NetBSD: bt_delete.c,v 1.16.6.2 2008/09/11 12:58:01 joerg Exp $");
     41  1.16.6.2  joerg 
     42  1.16.6.2  joerg #include "namespace.h"
     43  1.16.6.2  joerg #include <sys/types.h>
     44  1.16.6.2  joerg 
     45  1.16.6.2  joerg #include <assert.h>
     46  1.16.6.2  joerg #include <errno.h>
     47  1.16.6.2  joerg #include <stdio.h>
     48  1.16.6.2  joerg #include <string.h>
     49  1.16.6.2  joerg 
     50  1.16.6.2  joerg #include <db.h>
     51  1.16.6.2  joerg #include "btree.h"
     52  1.16.6.2  joerg 
     53  1.16.6.2  joerg static int __bt_bdelete(BTREE *, const DBT *);
     54  1.16.6.2  joerg static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int);
     55  1.16.6.2  joerg static int __bt_pdelete(BTREE *, PAGE *);
     56  1.16.6.2  joerg static int __bt_relink(BTREE *, PAGE *);
     57  1.16.6.2  joerg static int __bt_stkacq(BTREE *, PAGE **, CURSOR *);
     58  1.16.6.2  joerg 
     59  1.16.6.2  joerg /*
     60  1.16.6.2  joerg  * __bt_delete
     61  1.16.6.2  joerg  *	Delete the item(s) referenced by a key.
     62  1.16.6.2  joerg  *
     63  1.16.6.2  joerg  * Return RET_SPECIAL if the key is not found.
     64  1.16.6.2  joerg  */
     65  1.16.6.2  joerg int
     66  1.16.6.2  joerg __bt_delete(const DB *dbp, const DBT *key, u_int flags)
     67  1.16.6.2  joerg {
     68  1.16.6.2  joerg 	BTREE *t;
     69  1.16.6.2  joerg 	CURSOR *c;
     70  1.16.6.2  joerg 	PAGE *h;
     71  1.16.6.2  joerg 	int status;
     72  1.16.6.2  joerg 
     73  1.16.6.2  joerg 	t = dbp->internal;
     74  1.16.6.2  joerg 
     75  1.16.6.2  joerg 	/* Toss any page pinned across calls. */
     76  1.16.6.2  joerg 	if (t->bt_pinned != NULL) {
     77  1.16.6.2  joerg 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     78  1.16.6.2  joerg 		t->bt_pinned = NULL;
     79  1.16.6.2  joerg 	}
     80  1.16.6.2  joerg 
     81  1.16.6.2  joerg 	/* Check for change to a read-only tree. */
     82  1.16.6.2  joerg 	if (F_ISSET(t, B_RDONLY)) {
     83  1.16.6.2  joerg 		errno = EPERM;
     84  1.16.6.2  joerg 		return (RET_ERROR);
     85  1.16.6.2  joerg 	}
     86  1.16.6.2  joerg 
     87  1.16.6.2  joerg 	switch (flags) {
     88  1.16.6.2  joerg 	case 0:
     89  1.16.6.2  joerg 		status = __bt_bdelete(t, key);
     90  1.16.6.2  joerg 		break;
     91  1.16.6.2  joerg 	case R_CURSOR:
     92  1.16.6.2  joerg 		/*
     93  1.16.6.2  joerg 		 * If flags is R_CURSOR, delete the cursor.  Must already
     94  1.16.6.2  joerg 		 * have started a scan and not have already deleted it.
     95  1.16.6.2  joerg 		 */
     96  1.16.6.2  joerg 		c = &t->bt_cursor;
     97  1.16.6.2  joerg 		if (F_ISSET(c, CURS_INIT)) {
     98  1.16.6.2  joerg 			if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
     99  1.16.6.2  joerg 				return (RET_SPECIAL);
    100  1.16.6.2  joerg 			if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
    101  1.16.6.2  joerg 				return (RET_ERROR);
    102  1.16.6.2  joerg 
    103  1.16.6.2  joerg 			/*
    104  1.16.6.2  joerg 			 * If the page is about to be emptied, we'll need to
    105  1.16.6.2  joerg 			 * delete it, which means we have to acquire a stack.
    106  1.16.6.2  joerg 			 */
    107  1.16.6.2  joerg 			if (NEXTINDEX(h) == 1)
    108  1.16.6.2  joerg 				if (__bt_stkacq(t, &h, &t->bt_cursor))
    109  1.16.6.2  joerg 					return (RET_ERROR);
    110  1.16.6.2  joerg 
    111  1.16.6.2  joerg 			status = __bt_dleaf(t, NULL, h, (u_int)c->pg.index);
    112  1.16.6.2  joerg 
    113  1.16.6.2  joerg 			if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
    114  1.16.6.2  joerg 				if (__bt_pdelete(t, h))
    115  1.16.6.2  joerg 					return (RET_ERROR);
    116  1.16.6.2  joerg 			} else
    117  1.16.6.2  joerg 				mpool_put(t->bt_mp, h,
    118  1.16.6.2  joerg 				    (u_int)(status == RET_SUCCESS ?
    119  1.16.6.2  joerg 				    MPOOL_DIRTY : 0));
    120  1.16.6.2  joerg 			break;
    121  1.16.6.2  joerg 		}
    122  1.16.6.2  joerg 		/* FALLTHROUGH */
    123  1.16.6.2  joerg 	default:
    124  1.16.6.2  joerg 		errno = EINVAL;
    125  1.16.6.2  joerg 		return (RET_ERROR);
    126  1.16.6.2  joerg 	}
    127  1.16.6.2  joerg 	if (status == RET_SUCCESS)
    128  1.16.6.2  joerg 		F_SET(t, B_MODIFIED);
    129  1.16.6.2  joerg 	return (status);
    130  1.16.6.2  joerg }
    131  1.16.6.2  joerg 
    132  1.16.6.2  joerg /*
    133  1.16.6.2  joerg  * __bt_stkacq --
    134  1.16.6.2  joerg  *	Acquire a stack so we can delete a cursor entry.
    135  1.16.6.2  joerg  *
    136  1.16.6.2  joerg  * Parameters:
    137  1.16.6.2  joerg  *	  t:	tree
    138  1.16.6.2  joerg  *	 hp:	pointer to current, pinned PAGE pointer
    139  1.16.6.2  joerg  *	  c:	pointer to the cursor
    140  1.16.6.2  joerg  *
    141  1.16.6.2  joerg  * Returns:
    142  1.16.6.2  joerg  *	0 on success, 1 on failure
    143  1.16.6.2  joerg  */
    144  1.16.6.2  joerg static int
    145  1.16.6.2  joerg __bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c)
    146  1.16.6.2  joerg {
    147  1.16.6.2  joerg 	BINTERNAL *bi;
    148  1.16.6.2  joerg 	EPG *e;
    149  1.16.6.2  joerg 	EPGNO *parent;
    150  1.16.6.2  joerg 	PAGE *h;
    151  1.16.6.2  joerg 	indx_t idx = 0;	/* Pacify gcc */
    152  1.16.6.2  joerg 	pgno_t pgno;
    153  1.16.6.2  joerg 	recno_t nextpg, prevpg;
    154  1.16.6.2  joerg 	int exact, level;
    155  1.16.6.2  joerg 
    156  1.16.6.2  joerg 	/*
    157  1.16.6.2  joerg 	 * Find the first occurrence of the key in the tree.  Toss the
    158  1.16.6.2  joerg 	 * currently locked page so we don't hit an already-locked page.
    159  1.16.6.2  joerg 	 */
    160  1.16.6.2  joerg 	h = *hp;
    161  1.16.6.2  joerg 	mpool_put(t->bt_mp, h, 0);
    162  1.16.6.2  joerg 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    163  1.16.6.2  joerg 		return (1);
    164  1.16.6.2  joerg 	h = e->page;
    165  1.16.6.2  joerg 
    166  1.16.6.2  joerg 	/* See if we got it in one shot. */
    167  1.16.6.2  joerg 	if (h->pgno == c->pg.pgno)
    168  1.16.6.2  joerg 		goto ret;
    169  1.16.6.2  joerg 
    170  1.16.6.2  joerg 	/*
    171  1.16.6.2  joerg 	 * Move right, looking for the page.  At each move we have to move
    172  1.16.6.2  joerg 	 * up the stack until we don't have to move to the next page.  If
    173  1.16.6.2  joerg 	 * we have to change pages at an internal level, we have to fix the
    174  1.16.6.2  joerg 	 * stack back up.
    175  1.16.6.2  joerg 	 */
    176  1.16.6.2  joerg 	while (h->pgno != c->pg.pgno) {
    177  1.16.6.2  joerg 		if ((nextpg = h->nextpg) == P_INVALID)
    178  1.16.6.2  joerg 			break;
    179  1.16.6.2  joerg 		mpool_put(t->bt_mp, h, 0);
    180  1.16.6.2  joerg 
    181  1.16.6.2  joerg 		/* Move up the stack. */
    182  1.16.6.2  joerg 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    183  1.16.6.2  joerg 			/* Get the parent page. */
    184  1.16.6.2  joerg 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    185  1.16.6.2  joerg 				return (1);
    186  1.16.6.2  joerg 
    187  1.16.6.2  joerg 			/* Move to the next index. */
    188  1.16.6.2  joerg 			if (parent->index != NEXTINDEX(h) - 1) {
    189  1.16.6.2  joerg 				idx = parent->index + 1;
    190  1.16.6.2  joerg 				BT_PUSH(t, h->pgno, idx);
    191  1.16.6.2  joerg 				break;
    192  1.16.6.2  joerg 			}
    193  1.16.6.2  joerg 			mpool_put(t->bt_mp, h, 0);
    194  1.16.6.2  joerg 		}
    195  1.16.6.2  joerg 
    196  1.16.6.2  joerg 		/* Restore the stack. */
    197  1.16.6.2  joerg 		while (level--) {
    198  1.16.6.2  joerg 			/* Push the next level down onto the stack. */
    199  1.16.6.2  joerg 			bi = GETBINTERNAL(h, idx);
    200  1.16.6.2  joerg 			pgno = bi->pgno;
    201  1.16.6.2  joerg 			BT_PUSH(t, pgno, 0);
    202  1.16.6.2  joerg 
    203  1.16.6.2  joerg 			/* Lose the currently pinned page. */
    204  1.16.6.2  joerg 			mpool_put(t->bt_mp, h, 0);
    205  1.16.6.2  joerg 
    206  1.16.6.2  joerg 			/* Get the next level down. */
    207  1.16.6.2  joerg 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    208  1.16.6.2  joerg 				return (1);
    209  1.16.6.2  joerg 			idx = 0;
    210  1.16.6.2  joerg 		}
    211  1.16.6.2  joerg 		mpool_put(t->bt_mp, h, 0);
    212  1.16.6.2  joerg 		if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
    213  1.16.6.2  joerg 			return (1);
    214  1.16.6.2  joerg 	}
    215  1.16.6.2  joerg 
    216  1.16.6.2  joerg 	if (h->pgno == c->pg.pgno)
    217  1.16.6.2  joerg 		goto ret;
    218  1.16.6.2  joerg 
    219  1.16.6.2  joerg 	/* Reacquire the original stack. */
    220  1.16.6.2  joerg 	mpool_put(t->bt_mp, h, 0);
    221  1.16.6.2  joerg 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    222  1.16.6.2  joerg 		return (1);
    223  1.16.6.2  joerg 	h = e->page;
    224  1.16.6.2  joerg 
    225  1.16.6.2  joerg 	/*
    226  1.16.6.2  joerg 	 * Move left, looking for the page.  At each move we have to move
    227  1.16.6.2  joerg 	 * up the stack until we don't have to change pages to move to the
    228  1.16.6.2  joerg 	 * next page.  If we have to change pages at an internal level, we
    229  1.16.6.2  joerg 	 * have to fix the stack back up.
    230  1.16.6.2  joerg 	 */
    231  1.16.6.2  joerg 	while (h->pgno != c->pg.pgno) {
    232  1.16.6.2  joerg 		if ((prevpg = h->prevpg) == P_INVALID)
    233  1.16.6.2  joerg 			break;
    234  1.16.6.2  joerg 		mpool_put(t->bt_mp, h, 0);
    235  1.16.6.2  joerg 
    236  1.16.6.2  joerg 		/* Move up the stack. */
    237  1.16.6.2  joerg 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    238  1.16.6.2  joerg 			/* Get the parent page. */
    239  1.16.6.2  joerg 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    240  1.16.6.2  joerg 				return (1);
    241  1.16.6.2  joerg 
    242  1.16.6.2  joerg 			/* Move to the next index. */
    243  1.16.6.2  joerg 			if (parent->index != 0) {
    244  1.16.6.2  joerg 				idx = parent->index - 1;
    245  1.16.6.2  joerg 				BT_PUSH(t, h->pgno, idx);
    246  1.16.6.2  joerg 				break;
    247  1.16.6.2  joerg 			}
    248  1.16.6.2  joerg 			mpool_put(t->bt_mp, h, 0);
    249  1.16.6.2  joerg 		}
    250  1.16.6.2  joerg 
    251  1.16.6.2  joerg 		/* Restore the stack. */
    252  1.16.6.2  joerg 		while (level--) {
    253  1.16.6.2  joerg 			/* Push the next level down onto the stack. */
    254  1.16.6.2  joerg 			bi = GETBINTERNAL(h, idx);
    255  1.16.6.2  joerg 			pgno = bi->pgno;
    256  1.16.6.2  joerg 
    257  1.16.6.2  joerg 			/* Lose the currently pinned page. */
    258  1.16.6.2  joerg 			mpool_put(t->bt_mp, h, 0);
    259  1.16.6.2  joerg 
    260  1.16.6.2  joerg 			/* Get the next level down. */
    261  1.16.6.2  joerg 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    262  1.16.6.2  joerg 				return (1);
    263  1.16.6.2  joerg 
    264  1.16.6.2  joerg 			idx = NEXTINDEX(h) - 1;
    265  1.16.6.2  joerg 			BT_PUSH(t, pgno, idx);
    266  1.16.6.2  joerg 		}
    267  1.16.6.2  joerg 		mpool_put(t->bt_mp, h, 0);
    268  1.16.6.2  joerg 		if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
    269  1.16.6.2  joerg 			return (1);
    270  1.16.6.2  joerg 	}
    271  1.16.6.2  joerg 
    272  1.16.6.2  joerg 
    273  1.16.6.2  joerg ret:	mpool_put(t->bt_mp, h, 0);
    274  1.16.6.2  joerg 	return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
    275  1.16.6.2  joerg }
    276  1.16.6.2  joerg 
    277  1.16.6.2  joerg /*
    278  1.16.6.2  joerg  * __bt_bdelete --
    279  1.16.6.2  joerg  *	Delete all key/data pairs matching the specified key.
    280  1.16.6.2  joerg  *
    281  1.16.6.2  joerg  * Parameters:
    282  1.16.6.2  joerg  *	  t:	tree
    283  1.16.6.2  joerg  *	key:	key to delete
    284  1.16.6.2  joerg  *
    285  1.16.6.2  joerg  * Returns:
    286  1.16.6.2  joerg  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
    287  1.16.6.2  joerg  */
    288  1.16.6.2  joerg static int
    289  1.16.6.2  joerg __bt_bdelete(BTREE *t, const DBT *key)
    290  1.16.6.2  joerg {
    291  1.16.6.2  joerg 	EPG *e;
    292  1.16.6.2  joerg 	PAGE *h;
    293  1.16.6.2  joerg 	int deleted, exact, redo;
    294  1.16.6.2  joerg 
    295  1.16.6.2  joerg 	deleted = 0;
    296  1.16.6.2  joerg 
    297  1.16.6.2  joerg 	/* Find any matching record; __bt_search pins the page. */
    298  1.16.6.2  joerg loop:	if ((e = __bt_search(t, key, &exact)) == NULL)
    299  1.16.6.2  joerg 		return (deleted ? RET_SUCCESS : RET_ERROR);
    300  1.16.6.2  joerg 	if (!exact) {
    301  1.16.6.2  joerg 		mpool_put(t->bt_mp, e->page, 0);
    302  1.16.6.2  joerg 		return (deleted ? RET_SUCCESS : RET_SPECIAL);
    303  1.16.6.2  joerg 	}
    304  1.16.6.2  joerg 
    305  1.16.6.2  joerg 	/*
    306  1.16.6.2  joerg 	 * Delete forward, then delete backward, from the found key.  If
    307  1.16.6.2  joerg 	 * there are duplicates and we reach either side of the page, do
    308  1.16.6.2  joerg 	 * the key search again, so that we get them all.
    309  1.16.6.2  joerg 	 */
    310  1.16.6.2  joerg 	redo = 0;
    311  1.16.6.2  joerg 	h = e->page;
    312  1.16.6.2  joerg 	do {
    313  1.16.6.2  joerg 		if (__bt_dleaf(t, key, h, (u_int)e->index)) {
    314  1.16.6.2  joerg 			mpool_put(t->bt_mp, h, 0);
    315  1.16.6.2  joerg 			return (RET_ERROR);
    316  1.16.6.2  joerg 		}
    317  1.16.6.2  joerg 		if (F_ISSET(t, B_NODUPS)) {
    318  1.16.6.2  joerg 			if (NEXTINDEX(h) == 0) {
    319  1.16.6.2  joerg 				if (__bt_pdelete(t, h))
    320  1.16.6.2  joerg 					return (RET_ERROR);
    321  1.16.6.2  joerg 			} else
    322  1.16.6.2  joerg 				mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    323  1.16.6.2  joerg 			return (RET_SUCCESS);
    324  1.16.6.2  joerg 		}
    325  1.16.6.2  joerg 		deleted = 1;
    326  1.16.6.2  joerg 	} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
    327  1.16.6.2  joerg 
    328  1.16.6.2  joerg 	/* Check for right-hand edge of the page. */
    329  1.16.6.2  joerg 	if (e->index == NEXTINDEX(h))
    330  1.16.6.2  joerg 		redo = 1;
    331  1.16.6.2  joerg 
    332  1.16.6.2  joerg 	/* Delete from the key to the beginning of the page. */
    333  1.16.6.2  joerg 	while (e->index-- > 0) {
    334  1.16.6.2  joerg 		if (__bt_cmp(t, key, e) != 0)
    335  1.16.6.2  joerg 			break;
    336  1.16.6.2  joerg 		if (__bt_dleaf(t, key, h, (u_int)e->index) == RET_ERROR) {
    337  1.16.6.2  joerg 			mpool_put(t->bt_mp, h, 0);
    338  1.16.6.2  joerg 			return (RET_ERROR);
    339  1.16.6.2  joerg 		}
    340  1.16.6.2  joerg 		if (e->index == 0)
    341  1.16.6.2  joerg 			redo = 1;
    342  1.16.6.2  joerg 	}
    343  1.16.6.2  joerg 
    344  1.16.6.2  joerg 	/* Check for an empty page. */
    345  1.16.6.2  joerg 	if (NEXTINDEX(h) == 0) {
    346  1.16.6.2  joerg 		if (__bt_pdelete(t, h))
    347  1.16.6.2  joerg 			return (RET_ERROR);
    348  1.16.6.2  joerg 		goto loop;
    349  1.16.6.2  joerg 	}
    350  1.16.6.2  joerg 
    351  1.16.6.2  joerg 	/* Put the page. */
    352  1.16.6.2  joerg 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    353  1.16.6.2  joerg 
    354  1.16.6.2  joerg 	if (redo)
    355  1.16.6.2  joerg 		goto loop;
    356  1.16.6.2  joerg 	return (RET_SUCCESS);
    357  1.16.6.2  joerg }
    358  1.16.6.2  joerg 
    359  1.16.6.2  joerg /*
    360  1.16.6.2  joerg  * __bt_pdelete --
    361  1.16.6.2  joerg  *	Delete a single page from the tree.
    362  1.16.6.2  joerg  *
    363  1.16.6.2  joerg  * Parameters:
    364  1.16.6.2  joerg  *	t:	tree
    365  1.16.6.2  joerg  *	h:	leaf page
    366  1.16.6.2  joerg  *
    367  1.16.6.2  joerg  * Returns:
    368  1.16.6.2  joerg  *	RET_SUCCESS, RET_ERROR.
    369  1.16.6.2  joerg  *
    370  1.16.6.2  joerg  * Side-effects:
    371  1.16.6.2  joerg  *	mpool_put's the page
    372  1.16.6.2  joerg  */
    373  1.16.6.2  joerg static int
    374  1.16.6.2  joerg __bt_pdelete(BTREE *t, PAGE *h)
    375  1.16.6.2  joerg {
    376  1.16.6.2  joerg 	BINTERNAL *bi;
    377  1.16.6.2  joerg 	PAGE *pg;
    378  1.16.6.2  joerg 	EPGNO *parent;
    379  1.16.6.2  joerg 	indx_t cnt, idx, *ip, offset;
    380  1.16.6.2  joerg 	uint32_t nksize;
    381  1.16.6.2  joerg 	char *from;
    382  1.16.6.2  joerg 
    383  1.16.6.2  joerg 	/*
    384  1.16.6.2  joerg 	 * Walk the parent page stack -- a LIFO stack of the pages that were
    385  1.16.6.2  joerg 	 * traversed when we searched for the page where the delete occurred.
    386  1.16.6.2  joerg 	 * Each stack entry is a page number and a page index offset.  The
    387  1.16.6.2  joerg 	 * offset is for the page traversed on the search.  We've just deleted
    388  1.16.6.2  joerg 	 * a page, so we have to delete the key from the parent page.
    389  1.16.6.2  joerg 	 *
    390  1.16.6.2  joerg 	 * If the delete from the parent page makes it empty, this process may
    391  1.16.6.2  joerg 	 * continue all the way up the tree.  We stop if we reach the root page
    392  1.16.6.2  joerg 	 * (which is never deleted, it's just not worth the effort) or if the
    393  1.16.6.2  joerg 	 * delete does not empty the page.
    394  1.16.6.2  joerg 	 */
    395  1.16.6.2  joerg 	while ((parent = BT_POP(t)) != NULL) {
    396  1.16.6.2  joerg 		/* Get the parent page. */
    397  1.16.6.2  joerg 		if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    398  1.16.6.2  joerg 			return (RET_ERROR);
    399  1.16.6.2  joerg 
    400  1.16.6.2  joerg 		idx = parent->index;
    401  1.16.6.2  joerg 		bi = GETBINTERNAL(pg, idx);
    402  1.16.6.2  joerg 
    403  1.16.6.2  joerg 		/* Free any overflow pages. */
    404  1.16.6.2  joerg 		if (bi->flags & P_BIGKEY &&
    405  1.16.6.2  joerg 		    __ovfl_delete(t, bi->bytes) == RET_ERROR) {
    406  1.16.6.2  joerg 			mpool_put(t->bt_mp, pg, 0);
    407  1.16.6.2  joerg 			return (RET_ERROR);
    408  1.16.6.2  joerg 		}
    409  1.16.6.2  joerg 
    410  1.16.6.2  joerg 		/*
    411  1.16.6.2  joerg 		 * Free the parent if it has only the one key and it's not the
    412  1.16.6.2  joerg 		 * root page. If it's the rootpage, turn it back into an empty
    413  1.16.6.2  joerg 		 * leaf page.
    414  1.16.6.2  joerg 		 */
    415  1.16.6.2  joerg 		if (NEXTINDEX(pg) == 1) {
    416  1.16.6.2  joerg 			if (pg->pgno == P_ROOT) {
    417  1.16.6.2  joerg 				pg->lower = BTDATAOFF;
    418  1.16.6.2  joerg 				pg->upper = t->bt_psize;
    419  1.16.6.2  joerg 				pg->flags = P_BLEAF;
    420  1.16.6.2  joerg 			} else {
    421  1.16.6.2  joerg 				if (__bt_relink(t, pg) || __bt_free(t, pg))
    422  1.16.6.2  joerg 					return (RET_ERROR);
    423  1.16.6.2  joerg 				continue;
    424  1.16.6.2  joerg 			}
    425  1.16.6.2  joerg 		} else {
    426  1.16.6.2  joerg 			/* Pack remaining key items at the end of the page. */
    427  1.16.6.2  joerg 			nksize = NBINTERNAL(bi->ksize);
    428  1.16.6.2  joerg 			from = (char *)(void *)pg + pg->upper;
    429  1.16.6.2  joerg 			memmove(from + nksize, from,
    430  1.16.6.2  joerg 			(size_t)((char *)(void *)bi - from));
    431  1.16.6.2  joerg 			pg->upper += nksize;
    432  1.16.6.2  joerg 
    433  1.16.6.2  joerg 			/* Adjust indices' offsets, shift the indices down. */
    434  1.16.6.2  joerg 			offset = pg->linp[idx];
    435  1.16.6.2  joerg 			for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
    436  1.16.6.2  joerg 				if (ip[0] < offset)
    437  1.16.6.2  joerg 					ip[0] += nksize;
    438  1.16.6.2  joerg 			for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
    439  1.16.6.2  joerg 				ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
    440  1.16.6.2  joerg 			pg->lower -= sizeof(indx_t);
    441  1.16.6.2  joerg 		}
    442  1.16.6.2  joerg 
    443  1.16.6.2  joerg 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    444  1.16.6.2  joerg 		break;
    445  1.16.6.2  joerg 	}
    446  1.16.6.2  joerg 
    447  1.16.6.2  joerg 	/* Free the leaf page, as long as it wasn't the root. */
    448  1.16.6.2  joerg 	if (h->pgno == P_ROOT) {
    449  1.16.6.2  joerg 		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    450  1.16.6.2  joerg 		return (RET_SUCCESS);
    451  1.16.6.2  joerg 	}
    452  1.16.6.2  joerg 	return (__bt_relink(t, h) || __bt_free(t, h));
    453  1.16.6.2  joerg }
    454  1.16.6.2  joerg 
    455  1.16.6.2  joerg /*
    456  1.16.6.2  joerg  * __bt_dleaf --
    457  1.16.6.2  joerg  *	Delete a single record from a leaf page.
    458  1.16.6.2  joerg  *
    459  1.16.6.2  joerg  * Parameters:
    460  1.16.6.2  joerg  *	t:	tree
    461  1.16.6.2  joerg  *    key:	referenced key
    462  1.16.6.2  joerg  *	h:	page
    463  1.16.6.2  joerg  *	idx:	index on page to delete
    464  1.16.6.2  joerg  *
    465  1.16.6.2  joerg  * Returns:
    466  1.16.6.2  joerg  *	RET_SUCCESS, RET_ERROR.
    467  1.16.6.2  joerg  */
    468  1.16.6.2  joerg int
    469  1.16.6.2  joerg __bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx)
    470  1.16.6.2  joerg {
    471  1.16.6.2  joerg 	BLEAF *bl;
    472  1.16.6.2  joerg 	indx_t cnt, *ip, offset;
    473  1.16.6.2  joerg 	uint32_t nbytes;
    474  1.16.6.2  joerg 	void *to;
    475  1.16.6.2  joerg 	char *from;
    476  1.16.6.2  joerg 
    477  1.16.6.2  joerg 	/* If this record is referenced by the cursor, delete the cursor. */
    478  1.16.6.2  joerg 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    479  1.16.6.2  joerg 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    480  1.16.6.2  joerg 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
    481  1.16.6.2  joerg 	    __bt_curdel(t, key, h, idx))
    482  1.16.6.2  joerg 		return (RET_ERROR);
    483  1.16.6.2  joerg 
    484  1.16.6.2  joerg 	/* If the entry uses overflow pages, make them available for reuse. */
    485  1.16.6.2  joerg 	to = bl = GETBLEAF(h, idx);
    486  1.16.6.2  joerg 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
    487  1.16.6.2  joerg 		return (RET_ERROR);
    488  1.16.6.2  joerg 	if (bl->flags & P_BIGDATA &&
    489  1.16.6.2  joerg 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
    490  1.16.6.2  joerg 		return (RET_ERROR);
    491  1.16.6.2  joerg 
    492  1.16.6.2  joerg 	/* Pack the remaining key/data items at the end of the page. */
    493  1.16.6.2  joerg 	nbytes = NBLEAF(bl);
    494  1.16.6.2  joerg 	from = (char *)(void *)h + h->upper;
    495  1.16.6.2  joerg 	memmove(from + nbytes, from, (size_t)((char *)(void *)to - from));
    496  1.16.6.2  joerg 	h->upper += nbytes;
    497  1.16.6.2  joerg 
    498  1.16.6.2  joerg 	/* Adjust the indices' offsets, shift the indices down. */
    499  1.16.6.2  joerg 	offset = h->linp[idx];
    500  1.16.6.2  joerg 	for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
    501  1.16.6.2  joerg 		if (ip[0] < offset)
    502  1.16.6.2  joerg 			ip[0] += nbytes;
    503  1.16.6.2  joerg 	for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
    504  1.16.6.2  joerg 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
    505  1.16.6.2  joerg 	h->lower -= sizeof(indx_t);
    506  1.16.6.2  joerg 
    507  1.16.6.2  joerg 	/* If the cursor is on this page, adjust it as necessary. */
    508  1.16.6.2  joerg 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    509  1.16.6.2  joerg 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    510  1.16.6.2  joerg 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
    511  1.16.6.2  joerg 		--t->bt_cursor.pg.index;
    512  1.16.6.2  joerg 
    513  1.16.6.2  joerg 	return (RET_SUCCESS);
    514  1.16.6.2  joerg }
    515  1.16.6.2  joerg 
    516  1.16.6.2  joerg /*
    517  1.16.6.2  joerg  * __bt_curdel --
    518  1.16.6.2  joerg  *	Delete the cursor.
    519  1.16.6.2  joerg  *
    520  1.16.6.2  joerg  * Parameters:
    521  1.16.6.2  joerg  *	t:	tree
    522  1.16.6.2  joerg  *    key:	referenced key (or NULL)
    523  1.16.6.2  joerg  *	h:	page
    524  1.16.6.2  joerg  *  idx:	index on page to delete
    525  1.16.6.2  joerg  *
    526  1.16.6.2  joerg  * Returns:
    527  1.16.6.2  joerg  *	RET_SUCCESS, RET_ERROR.
    528  1.16.6.2  joerg  */
    529  1.16.6.2  joerg static int
    530  1.16.6.2  joerg __bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx)
    531  1.16.6.2  joerg {
    532  1.16.6.2  joerg 	CURSOR *c;
    533  1.16.6.2  joerg 	EPG e;
    534  1.16.6.2  joerg 	PAGE *pg;
    535  1.16.6.2  joerg 	int curcopy, status;
    536  1.16.6.2  joerg 
    537  1.16.6.2  joerg 	/*
    538  1.16.6.2  joerg 	 * If there are duplicates, move forward or backward to one.
    539  1.16.6.2  joerg 	 * Otherwise, copy the key into the cursor area.
    540  1.16.6.2  joerg 	 */
    541  1.16.6.2  joerg 	c = &t->bt_cursor;
    542  1.16.6.2  joerg 	F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
    543  1.16.6.2  joerg 
    544  1.16.6.2  joerg 	curcopy = 0;
    545  1.16.6.2  joerg 	if (!F_ISSET(t, B_NODUPS)) {
    546  1.16.6.2  joerg 		/*
    547  1.16.6.2  joerg 		 * We're going to have to do comparisons.  If we weren't
    548  1.16.6.2  joerg 		 * provided a copy of the key, i.e. the user is deleting
    549  1.16.6.2  joerg 		 * the current cursor position, get one.
    550  1.16.6.2  joerg 		 */
    551  1.16.6.2  joerg 		if (key == NULL) {
    552  1.16.6.2  joerg 			e.page = h;
    553  1.16.6.2  joerg 			e.index = idx;
    554  1.16.6.2  joerg 			if ((status = __bt_ret(t, &e,
    555  1.16.6.2  joerg 			    &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
    556  1.16.6.2  joerg 				return (status);
    557  1.16.6.2  joerg 			curcopy = 1;
    558  1.16.6.2  joerg 			key = &c->key;
    559  1.16.6.2  joerg 		}
    560  1.16.6.2  joerg 		/* Check previous key, if not at the beginning of the page. */
    561  1.16.6.2  joerg 		if (idx > 0) {
    562  1.16.6.2  joerg 			e.page = h;
    563  1.16.6.2  joerg 			e.index = idx - 1;
    564  1.16.6.2  joerg 			if (__bt_cmp(t, key, &e) == 0) {
    565  1.16.6.2  joerg 				F_SET(c, CURS_BEFORE);
    566  1.16.6.2  joerg 				goto dup2;
    567  1.16.6.2  joerg 			}
    568  1.16.6.2  joerg 		}
    569  1.16.6.2  joerg 		/* Check next key, if not at the end of the page. */
    570  1.16.6.2  joerg 		if (idx < NEXTINDEX(h) - 1) {
    571  1.16.6.2  joerg 			e.page = h;
    572  1.16.6.2  joerg 			e.index = idx + 1;
    573  1.16.6.2  joerg 			if (__bt_cmp(t, key, &e) == 0) {
    574  1.16.6.2  joerg 				F_SET(c, CURS_AFTER);
    575  1.16.6.2  joerg 				goto dup2;
    576  1.16.6.2  joerg 			}
    577  1.16.6.2  joerg 		}
    578  1.16.6.2  joerg 		/* Check previous key if at the beginning of the page. */
    579  1.16.6.2  joerg 		if (idx == 0 && h->prevpg != P_INVALID) {
    580  1.16.6.2  joerg 			if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    581  1.16.6.2  joerg 				return (RET_ERROR);
    582  1.16.6.2  joerg 			e.page = pg;
    583  1.16.6.2  joerg 			e.index = NEXTINDEX(pg) - 1;
    584  1.16.6.2  joerg 			if (__bt_cmp(t, key, &e) == 0) {
    585  1.16.6.2  joerg 				F_SET(c, CURS_BEFORE);
    586  1.16.6.2  joerg 				goto dup1;
    587  1.16.6.2  joerg 			}
    588  1.16.6.2  joerg 			mpool_put(t->bt_mp, pg, 0);
    589  1.16.6.2  joerg 		}
    590  1.16.6.2  joerg 		/* Check next key if at the end of the page. */
    591  1.16.6.2  joerg 		if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
    592  1.16.6.2  joerg 			if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    593  1.16.6.2  joerg 				return (RET_ERROR);
    594  1.16.6.2  joerg 			e.page = pg;
    595  1.16.6.2  joerg 			e.index = 0;
    596  1.16.6.2  joerg 			if (__bt_cmp(t, key, &e) == 0) {
    597  1.16.6.2  joerg 				F_SET(c, CURS_AFTER);
    598  1.16.6.2  joerg dup1:				mpool_put(t->bt_mp, pg, 0);
    599  1.16.6.2  joerg dup2:				c->pg.pgno = e.page->pgno;
    600  1.16.6.2  joerg 				c->pg.index = e.index;
    601  1.16.6.2  joerg 				return (RET_SUCCESS);
    602  1.16.6.2  joerg 			}
    603  1.16.6.2  joerg 			mpool_put(t->bt_mp, pg, 0);
    604  1.16.6.2  joerg 		}
    605  1.16.6.2  joerg 	}
    606  1.16.6.2  joerg 	e.page = h;
    607  1.16.6.2  joerg 	e.index = idx;
    608  1.16.6.2  joerg 	if (curcopy || (status =
    609  1.16.6.2  joerg 	    __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
    610  1.16.6.2  joerg 		F_SET(c, CURS_ACQUIRE);
    611  1.16.6.2  joerg 		return (RET_SUCCESS);
    612  1.16.6.2  joerg 	}
    613  1.16.6.2  joerg 	return (status);
    614  1.16.6.2  joerg }
    615  1.16.6.2  joerg 
    616  1.16.6.2  joerg /*
    617  1.16.6.2  joerg  * __bt_relink --
    618  1.16.6.2  joerg  *	Link around a deleted page.
    619  1.16.6.2  joerg  *
    620  1.16.6.2  joerg  * Parameters:
    621  1.16.6.2  joerg  *	t:	tree
    622  1.16.6.2  joerg  *	h:	page to be deleted
    623  1.16.6.2  joerg  */
    624  1.16.6.2  joerg static int
    625  1.16.6.2  joerg __bt_relink(BTREE *t, PAGE *h)
    626  1.16.6.2  joerg {
    627  1.16.6.2  joerg 	PAGE *pg;
    628  1.16.6.2  joerg 
    629  1.16.6.2  joerg 	if (h->nextpg != P_INVALID) {
    630  1.16.6.2  joerg 		if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    631  1.16.6.2  joerg 			return (RET_ERROR);
    632  1.16.6.2  joerg 		pg->prevpg = h->prevpg;
    633  1.16.6.2  joerg 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    634  1.16.6.2  joerg 	}
    635  1.16.6.2  joerg 	if (h->prevpg != P_INVALID) {
    636  1.16.6.2  joerg 		if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    637  1.16.6.2  joerg 			return (RET_ERROR);
    638  1.16.6.2  joerg 		pg->nextpg = h->nextpg;
    639  1.16.6.2  joerg 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    640  1.16.6.2  joerg 	}
    641  1.16.6.2  joerg 	return (0);
    642  1.16.6.2  joerg }
    643