Home | History | Annotate | Line # | Download | only in btree
bt_delete.c revision 1.1.1.1
      1      1.1  cgd /*-
      2  1.1.1.1  cgd  * Copyright (c) 1990, 1993, 1994
      3      1.1  cgd  *	The Regents of the University of California.  All rights reserved.
      4      1.1  cgd  *
      5      1.1  cgd  * This code is derived from software contributed to Berkeley by
      6      1.1  cgd  * Mike Olson.
      7      1.1  cgd  *
      8      1.1  cgd  * Redistribution and use in source and binary forms, with or without
      9      1.1  cgd  * modification, are permitted provided that the following conditions
     10      1.1  cgd  * are met:
     11      1.1  cgd  * 1. Redistributions of source code must retain the above copyright
     12      1.1  cgd  *    notice, this list of conditions and the following disclaimer.
     13      1.1  cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14      1.1  cgd  *    notice, this list of conditions and the following disclaimer in the
     15      1.1  cgd  *    documentation and/or other materials provided with the distribution.
     16      1.1  cgd  * 3. All advertising materials mentioning features or use of this software
     17      1.1  cgd  *    must display the following acknowledgement:
     18      1.1  cgd  *	This product includes software developed by the University of
     19      1.1  cgd  *	California, Berkeley and its contributors.
     20      1.1  cgd  * 4. Neither the name of the University nor the names of its contributors
     21      1.1  cgd  *    may be used to endorse or promote products derived from this software
     22      1.1  cgd  *    without specific prior written permission.
     23      1.1  cgd  *
     24      1.1  cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25      1.1  cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26      1.1  cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27      1.1  cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28      1.1  cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29      1.1  cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30      1.1  cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31      1.1  cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32      1.1  cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33      1.1  cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34      1.1  cgd  * SUCH DAMAGE.
     35      1.1  cgd  */
     36      1.1  cgd 
     37      1.1  cgd #if defined(LIBC_SCCS) && !defined(lint)
     38  1.1.1.1  cgd static char sccsid[] = "@(#)bt_delete.c	8.4 (Berkeley) 5/31/94";
     39      1.1  cgd #endif /* LIBC_SCCS and not lint */
     40      1.1  cgd 
     41      1.1  cgd #include <sys/types.h>
     42      1.1  cgd 
     43      1.1  cgd #include <errno.h>
     44      1.1  cgd #include <stdio.h>
     45      1.1  cgd #include <string.h>
     46      1.1  cgd 
     47      1.1  cgd #include <db.h>
     48      1.1  cgd #include "btree.h"
     49      1.1  cgd 
     50      1.1  cgd static int bt_bdelete __P((BTREE *, const DBT *));
     51      1.1  cgd 
     52      1.1  cgd /*
     53      1.1  cgd  * __BT_DELETE -- Delete the item(s) referenced by a key.
     54      1.1  cgd  *
     55      1.1  cgd  * Parameters:
     56      1.1  cgd  *	dbp:	pointer to access method
     57      1.1  cgd  *	key:	key to delete
     58      1.1  cgd  *	flags:	R_CURSOR if deleting what the cursor references
     59      1.1  cgd  *
     60      1.1  cgd  * Returns:
     61      1.1  cgd  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
     62      1.1  cgd  */
     63      1.1  cgd int
     64      1.1  cgd __bt_delete(dbp, key, flags)
     65      1.1  cgd 	const DB *dbp;
     66      1.1  cgd 	const DBT *key;
     67      1.1  cgd 	u_int flags;
     68      1.1  cgd {
     69      1.1  cgd 	BTREE *t;
     70      1.1  cgd 	int status;
     71      1.1  cgd 
     72      1.1  cgd 	t = dbp->internal;
     73  1.1.1.1  cgd 
     74  1.1.1.1  cgd 	/* Toss any page pinned across calls. */
     75  1.1.1.1  cgd 	if (t->bt_pinned != NULL) {
     76  1.1.1.1  cgd 		mpool_put(t->bt_mp, t->bt_pinned, 0);
     77  1.1.1.1  cgd 		t->bt_pinned = NULL;
     78  1.1.1.1  cgd 	}
     79  1.1.1.1  cgd 
     80      1.1  cgd 	if (ISSET(t, B_RDONLY)) {
     81      1.1  cgd 		errno = EPERM;
     82      1.1  cgd 		return (RET_ERROR);
     83      1.1  cgd 	}
     84  1.1.1.1  cgd 
     85      1.1  cgd 	switch(flags) {
     86      1.1  cgd 	case 0:
     87      1.1  cgd 		status = bt_bdelete(t, key);
     88      1.1  cgd 		break;
     89      1.1  cgd 	case R_CURSOR:
     90      1.1  cgd 		/*
     91      1.1  cgd 		 * If flags is R_CURSOR, delete the cursor; must already have
     92      1.1  cgd 		 * started a scan and not have already deleted the record.  For
     93      1.1  cgd 		 * the delete cursor bit to have been set requires that the
     94      1.1  cgd 		 * scan be initialized, so no reason to check.
     95      1.1  cgd 		 */
     96      1.1  cgd 		if (!ISSET(t, B_SEQINIT))
     97      1.1  cgd                         goto einval;
     98      1.1  cgd 		status = ISSET(t, B_DELCRSR) ?
     99      1.1  cgd 		    RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
    100      1.1  cgd 		break;
    101      1.1  cgd 	default:
    102      1.1  cgd einval:		errno = EINVAL;
    103      1.1  cgd 		return (RET_ERROR);
    104      1.1  cgd 	}
    105      1.1  cgd 	if (status == RET_SUCCESS)
    106      1.1  cgd 		SET(t, B_MODIFIED);
    107      1.1  cgd 	return (status);
    108      1.1  cgd }
    109      1.1  cgd 
    110      1.1  cgd /*
    111      1.1  cgd  * BT_BDELETE -- Delete all key/data pairs matching the specified key.
    112      1.1  cgd  *
    113      1.1  cgd  * Parameters:
    114      1.1  cgd  *	tree:	tree
    115      1.1  cgd  *	key:	key to delete
    116      1.1  cgd  *
    117      1.1  cgd  * Returns:
    118      1.1  cgd  *	RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
    119      1.1  cgd  */
    120      1.1  cgd static int
    121      1.1  cgd bt_bdelete(t, key)
    122      1.1  cgd 	BTREE *t;
    123      1.1  cgd 	const DBT *key;
    124      1.1  cgd {
    125      1.1  cgd 	EPG *e, save;
    126      1.1  cgd 	PAGE *h;
    127      1.1  cgd 	pgno_t cpgno, pg;
    128      1.1  cgd 	indx_t cindex;
    129      1.1  cgd 	int deleted, dirty1, dirty2, exact;
    130      1.1  cgd 
    131      1.1  cgd 	/* Find any matching record; __bt_search pins the page. */
    132      1.1  cgd 	if ((e = __bt_search(t, key, &exact)) == NULL)
    133      1.1  cgd 		return (RET_ERROR);
    134      1.1  cgd 	if (!exact) {
    135      1.1  cgd 		mpool_put(t->bt_mp, e->page, 0);
    136      1.1  cgd 		return (RET_SPECIAL);
    137      1.1  cgd 	}
    138      1.1  cgd 
    139      1.1  cgd 	/*
    140      1.1  cgd 	 * Delete forward, then delete backward, from the found key.  The
    141      1.1  cgd 	 * ordering is so that the deletions don't mess up the page refs.
    142      1.1  cgd 	 * The first loop deletes the key from the original page, the second
    143      1.1  cgd 	 * unpins the original page.  In the first loop, dirty1 is set if
    144      1.1  cgd 	 * the original page is modified, and dirty2 is set if any subsequent
    145      1.1  cgd 	 * pages are modified.  In the second loop, dirty1 starts off set if
    146      1.1  cgd 	 * the original page has been modified, and is set if any subsequent
    147      1.1  cgd 	 * pages are modified.
    148      1.1  cgd 	 *
    149      1.1  cgd 	 * If find the key referenced by the cursor, don't delete it, just
    150      1.1  cgd 	 * flag it for future deletion.  The cursor page number is P_INVALID
    151      1.1  cgd 	 * unless the sequential scan is initialized, so no reason to check.
    152      1.1  cgd 	 * A special case is when the already deleted cursor record was the
    153      1.1  cgd 	 * only record found.  If so, then the delete opertion fails as no
    154      1.1  cgd 	 * records were deleted.
    155      1.1  cgd 	 *
    156      1.1  cgd 	 * Cycle in place in the current page until the current record doesn't
    157      1.1  cgd 	 * match the key or the page is empty.  If the latter, walk forward,
    158      1.1  cgd 	 * skipping empty pages and repeating until a record doesn't match
    159      1.1  cgd 	 * the key or the end of the tree is reached.
    160      1.1  cgd 	 */
    161      1.1  cgd 	cpgno = t->bt_bcursor.pgno;
    162      1.1  cgd 	cindex = t->bt_bcursor.index;
    163      1.1  cgd 	save = *e;
    164      1.1  cgd 	dirty1 = 0;
    165      1.1  cgd 	for (h = e->page, deleted = 0;;) {
    166      1.1  cgd 		dirty2 = 0;
    167      1.1  cgd 		do {
    168      1.1  cgd 			if (h->pgno == cpgno && e->index == cindex) {
    169      1.1  cgd 				if (!ISSET(t, B_DELCRSR)) {
    170      1.1  cgd 					SET(t, B_DELCRSR);
    171      1.1  cgd 					deleted = 1;
    172      1.1  cgd 				}
    173      1.1  cgd 				++e->index;
    174      1.1  cgd 			} else {
    175      1.1  cgd 				if (__bt_dleaf(t, h, e->index)) {
    176      1.1  cgd 					if (h->pgno != save.page->pgno)
    177      1.1  cgd 						mpool_put(t->bt_mp, h, dirty2);
    178      1.1  cgd 					mpool_put(t->bt_mp, save.page, dirty1);
    179      1.1  cgd 					return (RET_ERROR);
    180      1.1  cgd 				}
    181      1.1  cgd 				if (h->pgno == save.page->pgno)
    182      1.1  cgd 					dirty1 = MPOOL_DIRTY;
    183      1.1  cgd 				else
    184      1.1  cgd 					dirty2 = MPOOL_DIRTY;
    185      1.1  cgd 				deleted = 1;
    186      1.1  cgd 			}
    187      1.1  cgd 		} while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
    188      1.1  cgd 
    189      1.1  cgd 		/*
    190      1.1  cgd 		 * Quit if didn't find a match, no next page, or first key on
    191      1.1  cgd 		 * the next page doesn't match.  Don't unpin the original page
    192      1.1  cgd 		 * unless an error occurs.
    193      1.1  cgd 		 */
    194      1.1  cgd 		if (e->index < NEXTINDEX(h))
    195      1.1  cgd 			break;
    196      1.1  cgd 		for (;;) {
    197      1.1  cgd 			if ((pg = h->nextpg) == P_INVALID)
    198      1.1  cgd 				goto done1;
    199      1.1  cgd 			if (h->pgno != save.page->pgno)
    200      1.1  cgd 				mpool_put(t->bt_mp, h, dirty2);
    201      1.1  cgd 			if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
    202      1.1  cgd 				mpool_put(t->bt_mp, save.page, dirty1);
    203      1.1  cgd 				return (RET_ERROR);
    204      1.1  cgd 			}
    205      1.1  cgd 			if (NEXTINDEX(h) != 0) {
    206      1.1  cgd 				e->page = h;
    207      1.1  cgd 				e->index = 0;
    208      1.1  cgd 				break;
    209      1.1  cgd 			}
    210      1.1  cgd 		}
    211      1.1  cgd 
    212      1.1  cgd 		if (__bt_cmp(t, key, e) != 0)
    213      1.1  cgd 			break;
    214      1.1  cgd 	}
    215      1.1  cgd 
    216      1.1  cgd 	/*
    217      1.1  cgd 	 * Reach here with the original page and the last page referenced
    218      1.1  cgd 	 * pinned (they may be the same).  Release it if not the original.
    219      1.1  cgd 	 */
    220      1.1  cgd done1:	if (h->pgno != save.page->pgno)
    221      1.1  cgd 		mpool_put(t->bt_mp, h, dirty2);
    222      1.1  cgd 
    223      1.1  cgd 	/*
    224      1.1  cgd 	 * Walk backwards from the record previous to the record returned by
    225      1.1  cgd 	 * __bt_search, skipping empty pages, until a record doesn't match
    226      1.1  cgd 	 * the key or reach the beginning of the tree.
    227      1.1  cgd 	 */
    228      1.1  cgd 	*e = save;
    229      1.1  cgd 	for (;;) {
    230      1.1  cgd 		if (e->index)
    231      1.1  cgd 			--e->index;
    232      1.1  cgd 		for (h = e->page; e->index; --e->index) {
    233      1.1  cgd 			if (__bt_cmp(t, key, e) != 0)
    234      1.1  cgd 				goto done2;
    235      1.1  cgd 			if (h->pgno == cpgno && e->index == cindex) {
    236      1.1  cgd 				if (!ISSET(t, B_DELCRSR)) {
    237      1.1  cgd 					SET(t, B_DELCRSR);
    238      1.1  cgd 					deleted = 1;
    239      1.1  cgd 				}
    240      1.1  cgd 			} else {
    241      1.1  cgd 				if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
    242      1.1  cgd 					mpool_put(t->bt_mp, h, dirty1);
    243      1.1  cgd 					return (RET_ERROR);
    244      1.1  cgd 				}
    245      1.1  cgd 				if (h->pgno == save.page->pgno)
    246      1.1  cgd 					dirty1 = MPOOL_DIRTY;
    247      1.1  cgd 				deleted = 1;
    248      1.1  cgd 			}
    249      1.1  cgd 		}
    250      1.1  cgd 
    251      1.1  cgd 		if ((pg = h->prevpg) == P_INVALID)
    252      1.1  cgd 			goto done2;
    253      1.1  cgd 		mpool_put(t->bt_mp, h, dirty1);
    254      1.1  cgd 		dirty1 = 0;
    255      1.1  cgd 		if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
    256      1.1  cgd 			return (RET_ERROR);
    257      1.1  cgd 		e->index = NEXTINDEX(e->page);
    258      1.1  cgd 	}
    259      1.1  cgd 
    260      1.1  cgd 	/*
    261      1.1  cgd 	 * Reach here with the last page that was looked at pinned.  Release
    262      1.1  cgd 	 * it.
    263      1.1  cgd 	 */
    264      1.1  cgd done2:	mpool_put(t->bt_mp, h, dirty1);
    265      1.1  cgd 	return (deleted ? RET_SUCCESS : RET_SPECIAL);
    266      1.1  cgd }
    267      1.1  cgd 
    268      1.1  cgd /*
    269      1.1  cgd  * __BT_DLEAF -- Delete a single record from a leaf page.
    270      1.1  cgd  *
    271      1.1  cgd  * Parameters:
    272      1.1  cgd  *	t:	tree
    273      1.1  cgd  *	index:	index on current page to delete
    274      1.1  cgd  *
    275      1.1  cgd  * Returns:
    276      1.1  cgd  *	RET_SUCCESS, RET_ERROR.
    277      1.1  cgd  */
    278      1.1  cgd int
    279      1.1  cgd __bt_dleaf(t, h, index)
    280      1.1  cgd 	BTREE *t;
    281      1.1  cgd 	PAGE *h;
    282  1.1.1.1  cgd 	indx_t index;
    283      1.1  cgd {
    284      1.1  cgd 	register BLEAF *bl;
    285  1.1.1.1  cgd 	register indx_t cnt, *ip, offset;
    286  1.1.1.1  cgd 	register u_int32_t nbytes;
    287      1.1  cgd 	char *from;
    288      1.1  cgd 	void *to;
    289      1.1  cgd 
    290      1.1  cgd 	/*
    291      1.1  cgd 	 * Delete a record from a btree leaf page.  Internal records are never
    292      1.1  cgd 	 * deleted from internal pages, regardless of the records that caused
    293      1.1  cgd 	 * them to be added being deleted.  Pages made empty by deletion are
    294      1.1  cgd 	 * not reclaimed.  They are, however, made available for reuse.
    295      1.1  cgd 	 *
    296      1.1  cgd 	 * Pack the remaining entries at the end of the page, shift the indices
    297      1.1  cgd 	 * down, overwriting the deleted record and its index.  If the record
    298      1.1  cgd 	 * uses overflow pages, make them available for reuse.
    299      1.1  cgd 	 */
    300      1.1  cgd 	to = bl = GETBLEAF(h, index);
    301      1.1  cgd 	if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
    302      1.1  cgd 		return (RET_ERROR);
    303      1.1  cgd 	if (bl->flags & P_BIGDATA &&
    304      1.1  cgd 	    __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
    305      1.1  cgd 		return (RET_ERROR);
    306      1.1  cgd 	nbytes = NBLEAF(bl);
    307      1.1  cgd 
    308      1.1  cgd 	/*
    309      1.1  cgd 	 * Compress the key/data pairs.  Compress and adjust the [BR]LEAF
    310      1.1  cgd 	 * offsets.  Reset the headers.
    311      1.1  cgd 	 */
    312      1.1  cgd 	from = (char *)h + h->upper;
    313      1.1  cgd 	memmove(from + nbytes, from, (char *)to - from);
    314      1.1  cgd 	h->upper += nbytes;
    315      1.1  cgd 
    316      1.1  cgd 	offset = h->linp[index];
    317      1.1  cgd 	for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
    318      1.1  cgd 		if (ip[0] < offset)
    319      1.1  cgd 			ip[0] += nbytes;
    320      1.1  cgd 	for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
    321      1.1  cgd 		ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
    322      1.1  cgd 	h->lower -= sizeof(indx_t);
    323      1.1  cgd 	return (RET_SUCCESS);
    324      1.1  cgd }
    325