Home | History | Annotate | Line # | Download | only in btree
bt_delete.c revision 1.13.12.1
      1  1.13.12.1  wrstuden /*	$NetBSD: bt_delete.c,v 1.13.12.1 2008/09/18 04:39:20 wrstuden Exp $	*/
      2        1.6       cgd 
      3        1.1       cgd /*-
      4        1.5       cgd  * Copyright (c) 1990, 1993, 1994
      5        1.1       cgd  *	The Regents of the University of California.  All rights reserved.
      6        1.1       cgd  *
      7        1.1       cgd  * This code is derived from software contributed to Berkeley by
      8        1.1       cgd  * Mike Olson.
      9        1.1       cgd  *
     10        1.1       cgd  * Redistribution and use in source and binary forms, with or without
     11        1.1       cgd  * modification, are permitted provided that the following conditions
     12        1.1       cgd  * are met:
     13        1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     14        1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     15        1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     16        1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     17        1.1       cgd  *    documentation and/or other materials provided with the distribution.
     18       1.12       agc  * 3. Neither the name of the University nor the names of its contributors
     19        1.1       cgd  *    may be used to endorse or promote products derived from this software
     20        1.1       cgd  *    without specific prior written permission.
     21        1.1       cgd  *
     22        1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23        1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24        1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25        1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26        1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27        1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28        1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29        1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30        1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31        1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32        1.1       cgd  * SUCH DAMAGE.
     33        1.1       cgd  */
     34        1.1       cgd 
     35        1.8  christos #include <sys/cdefs.h>
     36        1.1       cgd #if defined(LIBC_SCCS) && !defined(lint)
     37        1.6       cgd #if 0
     38        1.7       cgd static char sccsid[] = "@(#)bt_delete.c	8.13 (Berkeley) 7/28/94";
     39        1.6       cgd #else
     40  1.13.12.1  wrstuden __RCSID("$NetBSD: bt_delete.c,v 1.13.12.1 2008/09/18 04:39:20 wrstuden Exp $");
     41        1.6       cgd #endif
     42        1.1       cgd #endif /* LIBC_SCCS and not lint */
     43        1.1       cgd 
     44        1.9       jtc #include "namespace.h"
     45        1.1       cgd #include <sys/types.h>
     46        1.1       cgd 
     47       1.13  christos #include <assert.h>
     48        1.1       cgd #include <errno.h>
     49        1.1       cgd #include <stdio.h>
     50        1.1       cgd #include <string.h>
     51        1.1       cgd 
     52        1.1       cgd #include <db.h>
     53        1.1       cgd #include "btree.h"
     54        1.1       cgd 
     55       1.13  christos static int __bt_bdelete(BTREE *, const DBT *);
     56       1.13  christos static int __bt_curdel(BTREE *, const DBT *, PAGE *, u_int);
     57       1.13  christos static int __bt_pdelete(BTREE *, PAGE *);
     58       1.13  christos static int __bt_relink(BTREE *, PAGE *);
     59       1.13  christos static int __bt_stkacq(BTREE *, PAGE **, CURSOR *);
     60        1.1       cgd 
     61        1.1       cgd /*
     62        1.7       cgd  * __bt_delete
     63        1.7       cgd  *	Delete the item(s) referenced by a key.
     64        1.1       cgd  *
     65        1.7       cgd  * Return RET_SPECIAL if the key is not found.
     66        1.1       cgd  */
     67        1.1       cgd int
     68       1.13  christos __bt_delete(const DB *dbp, const DBT *key, u_int flags)
     69        1.1       cgd {
     70        1.1       cgd 	BTREE *t;
     71        1.7       cgd 	CURSOR *c;
     72        1.7       cgd 	PAGE *h;
     73        1.1       cgd 	int status;
     74        1.1       cgd 
     75        1.1       cgd 	t = dbp->internal;
     76        1.4       cgd 
     77        1.4       cgd 	/* Toss any page pinned across calls. */
     78        1.4       cgd 	if (t->bt_pinned != NULL) {
     79        1.4       cgd 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     80        1.4       cgd 		t->bt_pinned = NULL;
     81        1.4       cgd 	}
     82        1.4       cgd 
     83        1.7       cgd 	/* Check for change to a read-only tree. */
     84        1.7       cgd 	if (F_ISSET(t, B_RDONLY)) {
     85        1.1       cgd 		errno = EPERM;
     86        1.1       cgd 		return (RET_ERROR);
     87        1.1       cgd 	}
     88        1.4       cgd 
     89        1.7       cgd 	switch (flags) {
     90        1.1       cgd 	case 0:
     91        1.7       cgd 		status = __bt_bdelete(t, key);
     92        1.1       cgd 		break;
     93        1.1       cgd 	case R_CURSOR:
     94        1.1       cgd 		/*
     95        1.7       cgd 		 * If flags is R_CURSOR, delete the cursor.  Must already
     96        1.7       cgd 		 * have started a scan and not have already deleted it.
     97        1.1       cgd 		 */
     98        1.7       cgd 		c = &t->bt_cursor;
     99        1.7       cgd 		if (F_ISSET(c, CURS_INIT)) {
    100        1.7       cgd 			if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
    101        1.7       cgd 				return (RET_SPECIAL);
    102        1.7       cgd 			if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
    103        1.7       cgd 				return (RET_ERROR);
    104        1.7       cgd 
    105        1.7       cgd 			/*
    106        1.7       cgd 			 * If the page is about to be emptied, we'll need to
    107        1.7       cgd 			 * delete it, which means we have to acquire a stack.
    108        1.7       cgd 			 */
    109        1.7       cgd 			if (NEXTINDEX(h) == 1)
    110        1.7       cgd 				if (__bt_stkacq(t, &h, &t->bt_cursor))
    111        1.7       cgd 					return (RET_ERROR);
    112        1.7       cgd 
    113       1.10  christos 			status = __bt_dleaf(t, NULL, h, (u_int)c->pg.index);
    114        1.7       cgd 
    115        1.7       cgd 			if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
    116        1.7       cgd 				if (__bt_pdelete(t, h))
    117        1.7       cgd 					return (RET_ERROR);
    118        1.7       cgd 			} else
    119       1.10  christos 				mpool_put(t->bt_mp, h,
    120       1.10  christos 				    (u_int)(status == RET_SUCCESS ?
    121       1.10  christos 				    MPOOL_DIRTY : 0));
    122        1.7       cgd 			break;
    123        1.7       cgd 		}
    124        1.7       cgd 		/* FALLTHROUGH */
    125        1.1       cgd 	default:
    126        1.7       cgd 		errno = EINVAL;
    127        1.1       cgd 		return (RET_ERROR);
    128        1.1       cgd 	}
    129        1.1       cgd 	if (status == RET_SUCCESS)
    130        1.7       cgd 		F_SET(t, B_MODIFIED);
    131        1.1       cgd 	return (status);
    132        1.1       cgd }
    133        1.1       cgd 
    134        1.1       cgd /*
    135        1.7       cgd  * __bt_stkacq --
    136        1.7       cgd  *	Acquire a stack so we can delete a cursor entry.
    137        1.7       cgd  *
    138        1.7       cgd  * Parameters:
    139        1.7       cgd  *	  t:	tree
    140        1.7       cgd  *	 hp:	pointer to current, pinned PAGE pointer
    141        1.7       cgd  *	  c:	pointer to the cursor
    142        1.7       cgd  *
    143        1.7       cgd  * Returns:
    144        1.7       cgd  *	0 on success, 1 on failure
    145        1.7       cgd  */
    146        1.7       cgd static int
    147       1.13  christos __bt_stkacq(BTREE *t, PAGE **hp, CURSOR *c)
    148        1.7       cgd {
    149        1.7       cgd 	BINTERNAL *bi;
    150        1.7       cgd 	EPG *e;
    151        1.7       cgd 	EPGNO *parent;
    152        1.7       cgd 	PAGE *h;
    153       1.10  christos 	indx_t idx = 0;	/* Pacify gcc */
    154        1.7       cgd 	pgno_t pgno;
    155        1.7       cgd 	recno_t nextpg, prevpg;
    156        1.7       cgd 	int exact, level;
    157        1.7       cgd 
    158        1.7       cgd 	/*
    159        1.7       cgd 	 * Find the first occurrence of the key in the tree.  Toss the
    160        1.7       cgd 	 * currently locked page so we don't hit an already-locked page.
    161        1.7       cgd 	 */
    162        1.7       cgd 	h = *hp;
    163        1.7       cgd 	mpool_put(t->bt_mp, h, 0);
    164        1.7       cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    165        1.7       cgd 		return (1);
    166        1.7       cgd 	h = e->page;
    167        1.7       cgd 
    168        1.7       cgd 	/* See if we got it in one shot. */
    169        1.7       cgd 	if (h->pgno == c->pg.pgno)
    170        1.7       cgd 		goto ret;
    171        1.7       cgd 
    172        1.7       cgd 	/*
    173        1.7       cgd 	 * Move right, looking for the page.  At each move we have to move
    174        1.7       cgd 	 * up the stack until we don't have to move to the next page.  If
    175        1.7       cgd 	 * we have to change pages at an internal level, we have to fix the
    176        1.7       cgd 	 * stack back up.
    177        1.7       cgd 	 */
    178        1.7       cgd 	while (h->pgno != c->pg.pgno) {
    179        1.7       cgd 		if ((nextpg = h->nextpg) == P_INVALID)
    180        1.7       cgd 			break;
    181        1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    182        1.7       cgd 
    183        1.7       cgd 		/* Move up the stack. */
    184        1.7       cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    185        1.7       cgd 			/* Get the parent page. */
    186        1.7       cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    187        1.7       cgd 				return (1);
    188        1.7       cgd 
    189        1.7       cgd 			/* Move to the next index. */
    190        1.7       cgd 			if (parent->index != NEXTINDEX(h) - 1) {
    191       1.10  christos 				idx = parent->index + 1;
    192       1.10  christos 				BT_PUSH(t, h->pgno, idx);
    193        1.7       cgd 				break;
    194        1.7       cgd 			}
    195        1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    196        1.7       cgd 		}
    197        1.7       cgd 
    198        1.7       cgd 		/* Restore the stack. */
    199        1.7       cgd 		while (level--) {
    200        1.7       cgd 			/* Push the next level down onto the stack. */
    201       1.10  christos 			bi = GETBINTERNAL(h, idx);
    202        1.7       cgd 			pgno = bi->pgno;
    203        1.7       cgd 			BT_PUSH(t, pgno, 0);
    204        1.7       cgd 
    205        1.7       cgd 			/* Lose the currently pinned page. */
    206        1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    207        1.7       cgd 
    208        1.7       cgd 			/* Get the next level down. */
    209        1.7       cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    210        1.7       cgd 				return (1);
    211       1.10  christos 			idx = 0;
    212        1.7       cgd 		}
    213        1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    214        1.7       cgd 		if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
    215        1.7       cgd 			return (1);
    216        1.7       cgd 	}
    217        1.7       cgd 
    218        1.7       cgd 	if (h->pgno == c->pg.pgno)
    219        1.7       cgd 		goto ret;
    220        1.7       cgd 
    221        1.7       cgd 	/* Reacquire the original stack. */
    222        1.7       cgd 	mpool_put(t->bt_mp, h, 0);
    223        1.7       cgd 	if ((e = __bt_search(t, &c->key, &exact)) == NULL)
    224        1.7       cgd 		return (1);
    225        1.7       cgd 	h = e->page;
    226        1.7       cgd 
    227        1.7       cgd 	/*
    228        1.7       cgd 	 * Move left, looking for the page.  At each move we have to move
    229        1.7       cgd 	 * up the stack until we don't have to change pages to move to the
    230        1.7       cgd 	 * next page.  If we have to change pages at an internal level, we
    231        1.7       cgd 	 * have to fix the stack back up.
    232        1.7       cgd 	 */
    233        1.7       cgd 	while (h->pgno != c->pg.pgno) {
    234        1.7       cgd 		if ((prevpg = h->prevpg) == P_INVALID)
    235        1.7       cgd 			break;
    236        1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    237        1.7       cgd 
    238        1.7       cgd 		/* Move up the stack. */
    239        1.7       cgd 		for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
    240        1.7       cgd 			/* Get the parent page. */
    241        1.7       cgd 			if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    242        1.7       cgd 				return (1);
    243        1.7       cgd 
    244        1.7       cgd 			/* Move to the next index. */
    245        1.7       cgd 			if (parent->index != 0) {
    246       1.10  christos 				idx = parent->index - 1;
    247       1.10  christos 				BT_PUSH(t, h->pgno, idx);
    248        1.7       cgd 				break;
    249        1.7       cgd 			}
    250        1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    251        1.7       cgd 		}
    252        1.7       cgd 
    253        1.7       cgd 		/* Restore the stack. */
    254        1.7       cgd 		while (level--) {
    255        1.7       cgd 			/* Push the next level down onto the stack. */
    256       1.10  christos 			bi = GETBINTERNAL(h, idx);
    257        1.7       cgd 			pgno = bi->pgno;
    258        1.7       cgd 
    259        1.7       cgd 			/* Lose the currently pinned page. */
    260        1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    261        1.7       cgd 
    262        1.7       cgd 			/* Get the next level down. */
    263        1.7       cgd 			if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
    264        1.7       cgd 				return (1);
    265        1.7       cgd 
    266       1.10  christos 			idx = NEXTINDEX(h) - 1;
    267       1.10  christos 			BT_PUSH(t, pgno, idx);
    268        1.7       cgd 		}
    269        1.7       cgd 		mpool_put(t->bt_mp, h, 0);
    270        1.7       cgd 		if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
    271        1.7       cgd 			return (1);
    272        1.7       cgd 	}
    273        1.7       cgd 
    274        1.7       cgd 
    275        1.7       cgd ret:	mpool_put(t->bt_mp, h, 0);
    276        1.7       cgd 	return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
    277        1.7       cgd }
    278        1.7       cgd 
    279        1.7       cgd /*
    280        1.7       cgd  * __bt_bdelete --
    281        1.7       cgd  *	Delete all key/data pairs matching the specified key.
    282        1.1       cgd  *
    283        1.1       cgd  * Parameters:
    284        1.7       cgd  *	  t:	tree
    285        1.1       cgd  *	key:	key to delete
    286        1.1       cgd  *
    287        1.1       cgd  * Returns:
    288        1.1       cgd  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
    289        1.1       cgd  */
    290        1.1       cgd static int
    291       1.13  christos __bt_bdelete(BTREE *t, const DBT *key)
    292        1.1       cgd {
    293        1.7       cgd 	EPG *e;
    294        1.1       cgd 	PAGE *h;
    295        1.7       cgd 	int deleted, exact, redo;
    296        1.7       cgd 
    297        1.7       cgd 	deleted = 0;
    298        1.1       cgd 
    299        1.1       cgd 	/* Find any matching record; __bt_search pins the page. */
    300        1.7       cgd loop:	if ((e = __bt_search(t, key, &exact)) == NULL)
    301        1.7       cgd 		return (deleted ? RET_SUCCESS : RET_ERROR);
    302        1.1       cgd 	if (!exact) {
    303        1.1       cgd 		mpool_put(t->bt_mp, e->page, 0);
    304        1.7       cgd 		return (deleted ? RET_SUCCESS : RET_SPECIAL);
    305        1.1       cgd 	}
    306        1.1       cgd 
    307        1.1       cgd 	/*
    308        1.7       cgd 	 * Delete forward, then delete backward, from the found key.  If
    309        1.7       cgd 	 * there are duplicates and we reach either side of the page, do
    310        1.7       cgd 	 * the key search again, so that we get them all.
    311        1.1       cgd 	 */
    312        1.7       cgd 	redo = 0;
    313        1.7       cgd 	h = e->page;
    314        1.7       cgd 	do {
    315       1.10  christos 		if (__bt_dleaf(t, key, h, (u_int)e->index)) {
    316        1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    317        1.7       cgd 			return (RET_ERROR);
    318        1.7       cgd 		}
    319        1.7       cgd 		if (F_ISSET(t, B_NODUPS)) {
    320        1.7       cgd 			if (NEXTINDEX(h) == 0) {
    321        1.7       cgd 				if (__bt_pdelete(t, h))
    322        1.1       cgd 					return (RET_ERROR);
    323        1.7       cgd 			} else
    324        1.7       cgd 				mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    325        1.7       cgd 			return (RET_SUCCESS);
    326        1.7       cgd 		}
    327        1.7       cgd 		deleted = 1;
    328        1.7       cgd 	} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
    329        1.7       cgd 
    330        1.7       cgd 	/* Check for right-hand edge of the page. */
    331        1.7       cgd 	if (e->index == NEXTINDEX(h))
    332        1.7       cgd 		redo = 1;
    333        1.1       cgd 
    334        1.7       cgd 	/* Delete from the key to the beginning of the page. */
    335        1.7       cgd 	while (e->index-- > 0) {
    336        1.7       cgd 		if (__bt_cmp(t, key, e) != 0)
    337        1.1       cgd 			break;
    338       1.10  christos 		if (__bt_dleaf(t, key, h, (u_int)e->index) == RET_ERROR) {
    339        1.7       cgd 			mpool_put(t->bt_mp, h, 0);
    340        1.7       cgd 			return (RET_ERROR);
    341        1.1       cgd 		}
    342        1.7       cgd 		if (e->index == 0)
    343        1.7       cgd 			redo = 1;
    344        1.7       cgd 	}
    345        1.1       cgd 
    346        1.7       cgd 	/* Check for an empty page. */
    347        1.7       cgd 	if (NEXTINDEX(h) == 0) {
    348        1.7       cgd 		if (__bt_pdelete(t, h))
    349        1.7       cgd 			return (RET_ERROR);
    350        1.7       cgd 		goto loop;
    351        1.1       cgd 	}
    352        1.1       cgd 
    353        1.7       cgd 	/* Put the page. */
    354        1.7       cgd 	mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    355        1.7       cgd 
    356        1.7       cgd 	if (redo)
    357        1.7       cgd 		goto loop;
    358        1.7       cgd 	return (RET_SUCCESS);
    359        1.7       cgd }
    360        1.7       cgd 
    361        1.7       cgd /*
    362        1.7       cgd  * __bt_pdelete --
    363        1.7       cgd  *	Delete a single page from the tree.
    364        1.7       cgd  *
    365        1.7       cgd  * Parameters:
    366        1.7       cgd  *	t:	tree
    367        1.7       cgd  *	h:	leaf page
    368        1.7       cgd  *
    369        1.7       cgd  * Returns:
    370        1.7       cgd  *	RET_SUCCESS, RET_ERROR.
    371        1.7       cgd  *
    372        1.7       cgd  * Side-effects:
    373        1.7       cgd  *	mpool_put's the page
    374        1.7       cgd  */
    375        1.7       cgd static int
    376       1.13  christos __bt_pdelete(BTREE *t, PAGE *h)
    377        1.7       cgd {
    378        1.7       cgd 	BINTERNAL *bi;
    379        1.7       cgd 	PAGE *pg;
    380        1.7       cgd 	EPGNO *parent;
    381       1.10  christos 	indx_t cnt, idx, *ip, offset;
    382  1.13.12.1  wrstuden 	uint32_t nksize;
    383        1.7       cgd 	char *from;
    384        1.7       cgd 
    385        1.1       cgd 	/*
    386        1.7       cgd 	 * Walk the parent page stack -- a LIFO stack of the pages that were
    387        1.7       cgd 	 * traversed when we searched for the page where the delete occurred.
    388        1.7       cgd 	 * Each stack entry is a page number and a page index offset.  The
    389        1.7       cgd 	 * offset is for the page traversed on the search.  We've just deleted
    390        1.7       cgd 	 * a page, so we have to delete the key from the parent page.
    391        1.7       cgd 	 *
    392        1.7       cgd 	 * If the delete from the parent page makes it empty, this process may
    393        1.7       cgd 	 * continue all the way up the tree.  We stop if we reach the root page
    394        1.7       cgd 	 * (which is never deleted, it's just not worth the effort) or if the
    395        1.7       cgd 	 * delete does not empty the page.
    396        1.1       cgd 	 */
    397        1.7       cgd 	while ((parent = BT_POP(t)) != NULL) {
    398        1.7       cgd 		/* Get the parent page. */
    399        1.7       cgd 		if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
    400        1.7       cgd 			return (RET_ERROR);
    401        1.7       cgd 
    402       1.10  christos 		idx = parent->index;
    403       1.10  christos 		bi = GETBINTERNAL(pg, idx);
    404        1.7       cgd 
    405        1.7       cgd 		/* Free any overflow pages. */
    406        1.7       cgd 		if (bi->flags & P_BIGKEY &&
    407        1.7       cgd 		    __ovfl_delete(t, bi->bytes) == RET_ERROR) {
    408        1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    409        1.7       cgd 			return (RET_ERROR);
    410        1.7       cgd 		}
    411        1.1       cgd 
    412        1.7       cgd 		/*
    413        1.7       cgd 		 * Free the parent if it has only the one key and it's not the
    414        1.7       cgd 		 * root page. If it's the rootpage, turn it back into an empty
    415        1.7       cgd 		 * leaf page.
    416        1.7       cgd 		 */
    417       1.11  christos 		if (NEXTINDEX(pg) == 1) {
    418        1.7       cgd 			if (pg->pgno == P_ROOT) {
    419        1.7       cgd 				pg->lower = BTDATAOFF;
    420        1.7       cgd 				pg->upper = t->bt_psize;
    421        1.7       cgd 				pg->flags = P_BLEAF;
    422        1.1       cgd 			} else {
    423        1.7       cgd 				if (__bt_relink(t, pg) || __bt_free(t, pg))
    424        1.1       cgd 					return (RET_ERROR);
    425        1.7       cgd 				continue;
    426        1.1       cgd 			}
    427       1.11  christos 		} else {
    428        1.7       cgd 			/* Pack remaining key items at the end of the page. */
    429        1.7       cgd 			nksize = NBINTERNAL(bi->ksize);
    430       1.10  christos 			from = (char *)(void *)pg + pg->upper;
    431       1.10  christos 			memmove(from + nksize, from,
    432       1.10  christos 			(size_t)((char *)(void *)bi - from));
    433        1.7       cgd 			pg->upper += nksize;
    434        1.7       cgd 
    435        1.7       cgd 			/* Adjust indices' offsets, shift the indices down. */
    436       1.10  christos 			offset = pg->linp[idx];
    437       1.10  christos 			for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
    438        1.7       cgd 				if (ip[0] < offset)
    439        1.7       cgd 					ip[0] += nksize;
    440       1.10  christos 			for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
    441        1.7       cgd 				ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
    442        1.7       cgd 			pg->lower -= sizeof(indx_t);
    443        1.1       cgd 		}
    444        1.1       cgd 
    445        1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    446        1.7       cgd 		break;
    447        1.1       cgd 	}
    448        1.1       cgd 
    449        1.7       cgd 	/* Free the leaf page, as long as it wasn't the root. */
    450        1.7       cgd 	if (h->pgno == P_ROOT) {
    451        1.7       cgd 		mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    452        1.7       cgd 		return (RET_SUCCESS);
    453        1.7       cgd 	}
    454        1.7       cgd 	return (__bt_relink(t, h) || __bt_free(t, h));
    455        1.1       cgd }
    456        1.1       cgd 
    457        1.1       cgd /*
    458        1.7       cgd  * __bt_dleaf --
    459        1.7       cgd  *	Delete a single record from a leaf page.
    460        1.1       cgd  *
    461        1.1       cgd  * Parameters:
    462        1.1       cgd  *	t:	tree
    463        1.7       cgd  *    key:	referenced key
    464        1.7       cgd  *	h:	page
    465       1.10  christos  *	idx:	index on page to delete
    466        1.1       cgd  *
    467        1.1       cgd  * Returns:
    468        1.1       cgd  *	RET_SUCCESS, RET_ERROR.
    469        1.1       cgd  */
    470        1.1       cgd int
    471       1.13  christos __bt_dleaf(BTREE *t, const DBT *key, PAGE *h, u_int idx)
    472        1.1       cgd {
    473        1.7       cgd 	BLEAF *bl;
    474        1.7       cgd 	indx_t cnt, *ip, offset;
    475  1.13.12.1  wrstuden 	uint32_t nbytes;
    476        1.7       cgd 	void *to;
    477        1.1       cgd 	char *from;
    478        1.1       cgd 
    479        1.7       cgd 	/* If this record is referenced by the cursor, delete the cursor. */
    480        1.7       cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    481        1.7       cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    482       1.10  christos 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
    483       1.10  christos 	    __bt_curdel(t, key, h, idx))
    484        1.7       cgd 		return (RET_ERROR);
    485        1.7       cgd 
    486        1.7       cgd 	/* If the entry uses overflow pages, make them available for reuse. */
    487       1.10  christos 	to = bl = GETBLEAF(h, idx);
    488        1.1       cgd 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
    489        1.1       cgd 		return (RET_ERROR);
    490        1.1       cgd 	if (bl->flags & P_BIGDATA &&
    491        1.1       cgd 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
    492        1.1       cgd 		return (RET_ERROR);
    493        1.7       cgd 
    494        1.7       cgd 	/* Pack the remaining key/data items at the end of the page. */
    495        1.1       cgd 	nbytes = NBLEAF(bl);
    496       1.10  christos 	from = (char *)(void *)h + h->upper;
    497       1.10  christos 	memmove(from + nbytes, from, (size_t)((char *)(void *)to - from));
    498        1.1       cgd 	h->upper += nbytes;
    499        1.1       cgd 
    500        1.7       cgd 	/* Adjust the indices' offsets, shift the indices down. */
    501       1.10  christos 	offset = h->linp[idx];
    502       1.10  christos 	for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
    503        1.1       cgd 		if (ip[0] < offset)
    504        1.1       cgd 			ip[0] += nbytes;
    505       1.10  christos 	for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
    506        1.1       cgd 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
    507        1.1       cgd 	h->lower -= sizeof(indx_t);
    508        1.7       cgd 
    509        1.7       cgd 	/* If the cursor is on this page, adjust it as necessary. */
    510        1.7       cgd 	if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
    511        1.7       cgd 	    !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
    512       1.10  christos 	    t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
    513        1.7       cgd 		--t->bt_cursor.pg.index;
    514        1.7       cgd 
    515        1.1       cgd 	return (RET_SUCCESS);
    516        1.7       cgd }
    517        1.7       cgd 
    518        1.7       cgd /*
    519        1.7       cgd  * __bt_curdel --
    520        1.7       cgd  *	Delete the cursor.
    521        1.7       cgd  *
    522        1.7       cgd  * Parameters:
    523        1.7       cgd  *	t:	tree
    524        1.7       cgd  *    key:	referenced key (or NULL)
    525        1.7       cgd  *	h:	page
    526       1.10  christos  *  idx:	index on page to delete
    527        1.7       cgd  *
    528        1.7       cgd  * Returns:
    529        1.7       cgd  *	RET_SUCCESS, RET_ERROR.
    530        1.7       cgd  */
    531        1.7       cgd static int
    532       1.13  christos __bt_curdel(BTREE *t, const DBT *key, PAGE *h, u_int idx)
    533        1.7       cgd {
    534        1.7       cgd 	CURSOR *c;
    535        1.7       cgd 	EPG e;
    536        1.7       cgd 	PAGE *pg;
    537        1.7       cgd 	int curcopy, status;
    538        1.7       cgd 
    539        1.7       cgd 	/*
    540        1.7       cgd 	 * If there are duplicates, move forward or backward to one.
    541        1.7       cgd 	 * Otherwise, copy the key into the cursor area.
    542        1.7       cgd 	 */
    543        1.7       cgd 	c = &t->bt_cursor;
    544        1.7       cgd 	F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
    545        1.7       cgd 
    546        1.7       cgd 	curcopy = 0;
    547        1.7       cgd 	if (!F_ISSET(t, B_NODUPS)) {
    548        1.7       cgd 		/*
    549        1.7       cgd 		 * We're going to have to do comparisons.  If we weren't
    550        1.7       cgd 		 * provided a copy of the key, i.e. the user is deleting
    551        1.7       cgd 		 * the current cursor position, get one.
    552        1.7       cgd 		 */
    553        1.7       cgd 		if (key == NULL) {
    554        1.7       cgd 			e.page = h;
    555       1.10  christos 			e.index = idx;
    556        1.7       cgd 			if ((status = __bt_ret(t, &e,
    557        1.7       cgd 			    &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
    558        1.7       cgd 				return (status);
    559        1.7       cgd 			curcopy = 1;
    560        1.7       cgd 			key = &c->key;
    561        1.7       cgd 		}
    562        1.7       cgd 		/* Check previous key, if not at the beginning of the page. */
    563       1.10  christos 		if (idx > 0) {
    564        1.7       cgd 			e.page = h;
    565       1.10  christos 			e.index = idx - 1;
    566        1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    567        1.7       cgd 				F_SET(c, CURS_BEFORE);
    568        1.7       cgd 				goto dup2;
    569        1.7       cgd 			}
    570        1.7       cgd 		}
    571        1.7       cgd 		/* Check next key, if not at the end of the page. */
    572       1.10  christos 		if (idx < NEXTINDEX(h) - 1) {
    573        1.7       cgd 			e.page = h;
    574       1.10  christos 			e.index = idx + 1;
    575        1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    576        1.7       cgd 				F_SET(c, CURS_AFTER);
    577        1.7       cgd 				goto dup2;
    578        1.7       cgd 			}
    579        1.7       cgd 		}
    580        1.7       cgd 		/* Check previous key if at the beginning of the page. */
    581       1.10  christos 		if (idx == 0 && h->prevpg != P_INVALID) {
    582        1.7       cgd 			if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    583        1.7       cgd 				return (RET_ERROR);
    584        1.7       cgd 			e.page = pg;
    585        1.7       cgd 			e.index = NEXTINDEX(pg) - 1;
    586        1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    587        1.7       cgd 				F_SET(c, CURS_BEFORE);
    588        1.7       cgd 				goto dup1;
    589        1.7       cgd 			}
    590        1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    591        1.7       cgd 		}
    592        1.7       cgd 		/* Check next key if at the end of the page. */
    593       1.10  christos 		if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
    594        1.7       cgd 			if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    595        1.7       cgd 				return (RET_ERROR);
    596        1.7       cgd 			e.page = pg;
    597        1.7       cgd 			e.index = 0;
    598        1.7       cgd 			if (__bt_cmp(t, key, &e) == 0) {
    599        1.7       cgd 				F_SET(c, CURS_AFTER);
    600        1.7       cgd dup1:				mpool_put(t->bt_mp, pg, 0);
    601        1.7       cgd dup2:				c->pg.pgno = e.page->pgno;
    602        1.7       cgd 				c->pg.index = e.index;
    603        1.7       cgd 				return (RET_SUCCESS);
    604        1.7       cgd 			}
    605        1.7       cgd 			mpool_put(t->bt_mp, pg, 0);
    606        1.7       cgd 		}
    607        1.7       cgd 	}
    608        1.7       cgd 	e.page = h;
    609       1.10  christos 	e.index = idx;
    610        1.7       cgd 	if (curcopy || (status =
    611        1.7       cgd 	    __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
    612        1.7       cgd 		F_SET(c, CURS_ACQUIRE);
    613        1.7       cgd 		return (RET_SUCCESS);
    614        1.7       cgd 	}
    615        1.7       cgd 	return (status);
    616        1.7       cgd }
    617        1.7       cgd 
    618        1.7       cgd /*
    619        1.7       cgd  * __bt_relink --
    620        1.7       cgd  *	Link around a deleted page.
    621        1.7       cgd  *
    622        1.7       cgd  * Parameters:
    623        1.7       cgd  *	t:	tree
    624        1.7       cgd  *	h:	page to be deleted
    625        1.7       cgd  */
    626        1.7       cgd static int
    627       1.13  christos __bt_relink(BTREE *t, PAGE *h)
    628        1.7       cgd {
    629        1.7       cgd 	PAGE *pg;
    630        1.7       cgd 
    631        1.7       cgd 	if (h->nextpg != P_INVALID) {
    632        1.7       cgd 		if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
    633        1.7       cgd 			return (RET_ERROR);
    634        1.7       cgd 		pg->prevpg = h->prevpg;
    635        1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    636        1.7       cgd 	}
    637        1.7       cgd 	if (h->prevpg != P_INVALID) {
    638        1.7       cgd 		if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
    639        1.7       cgd 			return (RET_ERROR);
    640        1.7       cgd 		pg->nextpg = h->nextpg;
    641        1.7       cgd 		mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
    642        1.7       cgd 	}
    643        1.7       cgd 	return (0);
    644        1.1       cgd }
    645