bt_delete.c revision 1.7 1 /* $NetBSD: bt_delete.c,v 1.7 1996/05/03 21:50:44 cgd Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #if defined(LIBC_SCCS) && !defined(lint)
40 #if 0
41 static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94";
42 #else
43 static char rcsid[] = "$NetBSD: bt_delete.c,v 1.7 1996/05/03 21:50:44 cgd Exp $";
44 #endif
45 #endif /* LIBC_SCCS and not lint */
46
47 #include <sys/types.h>
48
49 #include <errno.h>
50 #include <stdio.h>
51 #include <string.h>
52
53 #include <db.h>
54 #include "btree.h"
55
56 static int __bt_bdelete __P((BTREE *, const DBT *));
57 static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int));
58 static int __bt_pdelete __P((BTREE *, PAGE *));
59 static int __bt_relink __P((BTREE *, PAGE *));
60 static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *));
61
62 /*
63 * __bt_delete
64 * Delete the item(s) referenced by a key.
65 *
66 * Return RET_SPECIAL if the key is not found.
67 */
68 int
69 __bt_delete(dbp, key, flags)
70 const DB *dbp;
71 const DBT *key;
72 u_int flags;
73 {
74 BTREE *t;
75 CURSOR *c;
76 PAGE *h;
77 int status;
78
79 t = dbp->internal;
80
81 /* Toss any page pinned across calls. */
82 if (t->bt_pinned != NULL) {
83 mpool_put(t->bt_mp, t->bt_pinned, 0);
84 t->bt_pinned = NULL;
85 }
86
87 /* Check for change to a read-only tree. */
88 if (F_ISSET(t, B_RDONLY)) {
89 errno = EPERM;
90 return (RET_ERROR);
91 }
92
93 switch (flags) {
94 case 0:
95 status = __bt_bdelete(t, key);
96 break;
97 case R_CURSOR:
98 /*
99 * If flags is R_CURSOR, delete the cursor. Must already
100 * have started a scan and not have already deleted it.
101 */
102 c = &t->bt_cursor;
103 if (F_ISSET(c, CURS_INIT)) {
104 if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
105 return (RET_SPECIAL);
106 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
107 return (RET_ERROR);
108
109 /*
110 * If the page is about to be emptied, we'll need to
111 * delete it, which means we have to acquire a stack.
112 */
113 if (NEXTINDEX(h) == 1)
114 if (__bt_stkacq(t, &h, &t->bt_cursor))
115 return (RET_ERROR);
116
117 status = __bt_dleaf(t, NULL, h, c->pg.index);
118
119 if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
120 if (__bt_pdelete(t, h))
121 return (RET_ERROR);
122 } else
123 mpool_put(t->bt_mp,
124 h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
125 break;
126 }
127 /* FALLTHROUGH */
128 default:
129 errno = EINVAL;
130 return (RET_ERROR);
131 }
132 if (status == RET_SUCCESS)
133 F_SET(t, B_MODIFIED);
134 return (status);
135 }
136
137 /*
138 * __bt_stkacq --
139 * Acquire a stack so we can delete a cursor entry.
140 *
141 * Parameters:
142 * t: tree
143 * hp: pointer to current, pinned PAGE pointer
144 * c: pointer to the cursor
145 *
146 * Returns:
147 * 0 on success, 1 on failure
148 */
149 static int
150 __bt_stkacq(t, hp, c)
151 BTREE *t;
152 PAGE **hp;
153 CURSOR *c;
154 {
155 BINTERNAL *bi;
156 EPG *e;
157 EPGNO *parent;
158 PAGE *h;
159 indx_t index;
160 pgno_t pgno;
161 recno_t nextpg, prevpg;
162 int exact, level;
163
164 /*
165 * Find the first occurrence of the key in the tree. Toss the
166 * currently locked page so we don't hit an already-locked page.
167 */
168 h = *hp;
169 mpool_put(t->bt_mp, h, 0);
170 if ((e = __bt_search(t, &c->key, &exact)) == NULL)
171 return (1);
172 h = e->page;
173
174 /* See if we got it in one shot. */
175 if (h->pgno == c->pg.pgno)
176 goto ret;
177
178 /*
179 * Move right, looking for the page. At each move we have to move
180 * up the stack until we don't have to move to the next page. If
181 * we have to change pages at an internal level, we have to fix the
182 * stack back up.
183 */
184 while (h->pgno != c->pg.pgno) {
185 if ((nextpg = h->nextpg) == P_INVALID)
186 break;
187 mpool_put(t->bt_mp, h, 0);
188
189 /* Move up the stack. */
190 for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
191 /* Get the parent page. */
192 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
193 return (1);
194
195 /* Move to the next index. */
196 if (parent->index != NEXTINDEX(h) - 1) {
197 index = parent->index + 1;
198 BT_PUSH(t, h->pgno, index);
199 break;
200 }
201 mpool_put(t->bt_mp, h, 0);
202 }
203
204 /* Restore the stack. */
205 while (level--) {
206 /* Push the next level down onto the stack. */
207 bi = GETBINTERNAL(h, index);
208 pgno = bi->pgno;
209 BT_PUSH(t, pgno, 0);
210
211 /* Lose the currently pinned page. */
212 mpool_put(t->bt_mp, h, 0);
213
214 /* Get the next level down. */
215 if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
216 return (1);
217 index = 0;
218 }
219 mpool_put(t->bt_mp, h, 0);
220 if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
221 return (1);
222 }
223
224 if (h->pgno == c->pg.pgno)
225 goto ret;
226
227 /* Reacquire the original stack. */
228 mpool_put(t->bt_mp, h, 0);
229 if ((e = __bt_search(t, &c->key, &exact)) == NULL)
230 return (1);
231 h = e->page;
232
233 /*
234 * Move left, looking for the page. At each move we have to move
235 * up the stack until we don't have to change pages to move to the
236 * next page. If we have to change pages at an internal level, we
237 * have to fix the stack back up.
238 */
239 while (h->pgno != c->pg.pgno) {
240 if ((prevpg = h->prevpg) == P_INVALID)
241 break;
242 mpool_put(t->bt_mp, h, 0);
243
244 /* Move up the stack. */
245 for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
246 /* Get the parent page. */
247 if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
248 return (1);
249
250 /* Move to the next index. */
251 if (parent->index != 0) {
252 index = parent->index - 1;
253 BT_PUSH(t, h->pgno, index);
254 break;
255 }
256 mpool_put(t->bt_mp, h, 0);
257 }
258
259 /* Restore the stack. */
260 while (level--) {
261 /* Push the next level down onto the stack. */
262 bi = GETBINTERNAL(h, index);
263 pgno = bi->pgno;
264
265 /* Lose the currently pinned page. */
266 mpool_put(t->bt_mp, h, 0);
267
268 /* Get the next level down. */
269 if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
270 return (1);
271
272 index = NEXTINDEX(h) - 1;
273 BT_PUSH(t, pgno, index);
274 }
275 mpool_put(t->bt_mp, h, 0);
276 if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
277 return (1);
278 }
279
280
281 ret: mpool_put(t->bt_mp, h, 0);
282 return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
283 }
284
285 /*
286 * __bt_bdelete --
287 * Delete all key/data pairs matching the specified key.
288 *
289 * Parameters:
290 * t: tree
291 * key: key to delete
292 *
293 * Returns:
294 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
295 */
296 static int
297 __bt_bdelete(t, key)
298 BTREE *t;
299 const DBT *key;
300 {
301 EPG *e;
302 PAGE *h;
303 int deleted, exact, redo;
304
305 deleted = 0;
306
307 /* Find any matching record; __bt_search pins the page. */
308 loop: if ((e = __bt_search(t, key, &exact)) == NULL)
309 return (deleted ? RET_SUCCESS : RET_ERROR);
310 if (!exact) {
311 mpool_put(t->bt_mp, e->page, 0);
312 return (deleted ? RET_SUCCESS : RET_SPECIAL);
313 }
314
315 /*
316 * Delete forward, then delete backward, from the found key. If
317 * there are duplicates and we reach either side of the page, do
318 * the key search again, so that we get them all.
319 */
320 redo = 0;
321 h = e->page;
322 do {
323 if (__bt_dleaf(t, key, h, e->index)) {
324 mpool_put(t->bt_mp, h, 0);
325 return (RET_ERROR);
326 }
327 if (F_ISSET(t, B_NODUPS)) {
328 if (NEXTINDEX(h) == 0) {
329 if (__bt_pdelete(t, h))
330 return (RET_ERROR);
331 } else
332 mpool_put(t->bt_mp, h, MPOOL_DIRTY);
333 return (RET_SUCCESS);
334 }
335 deleted = 1;
336 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
337
338 /* Check for right-hand edge of the page. */
339 if (e->index == NEXTINDEX(h))
340 redo = 1;
341
342 /* Delete from the key to the beginning of the page. */
343 while (e->index-- > 0) {
344 if (__bt_cmp(t, key, e) != 0)
345 break;
346 if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
347 mpool_put(t->bt_mp, h, 0);
348 return (RET_ERROR);
349 }
350 if (e->index == 0)
351 redo = 1;
352 }
353
354 /* Check for an empty page. */
355 if (NEXTINDEX(h) == 0) {
356 if (__bt_pdelete(t, h))
357 return (RET_ERROR);
358 goto loop;
359 }
360
361 /* Put the page. */
362 mpool_put(t->bt_mp, h, MPOOL_DIRTY);
363
364 if (redo)
365 goto loop;
366 return (RET_SUCCESS);
367 }
368
369 /*
370 * __bt_pdelete --
371 * Delete a single page from the tree.
372 *
373 * Parameters:
374 * t: tree
375 * h: leaf page
376 *
377 * Returns:
378 * RET_SUCCESS, RET_ERROR.
379 *
380 * Side-effects:
381 * mpool_put's the page
382 */
383 static int
384 __bt_pdelete(t, h)
385 BTREE *t;
386 PAGE *h;
387 {
388 BINTERNAL *bi;
389 PAGE *pg;
390 EPGNO *parent;
391 indx_t cnt, index, *ip, offset;
392 u_int32_t nksize;
393 char *from;
394
395 /*
396 * Walk the parent page stack -- a LIFO stack of the pages that were
397 * traversed when we searched for the page where the delete occurred.
398 * Each stack entry is a page number and a page index offset. The
399 * offset is for the page traversed on the search. We've just deleted
400 * a page, so we have to delete the key from the parent page.
401 *
402 * If the delete from the parent page makes it empty, this process may
403 * continue all the way up the tree. We stop if we reach the root page
404 * (which is never deleted, it's just not worth the effort) or if the
405 * delete does not empty the page.
406 */
407 while ((parent = BT_POP(t)) != NULL) {
408 /* Get the parent page. */
409 if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
410 return (RET_ERROR);
411
412 index = parent->index;
413 bi = GETBINTERNAL(pg, index);
414
415 /* Free any overflow pages. */
416 if (bi->flags & P_BIGKEY &&
417 __ovfl_delete(t, bi->bytes) == RET_ERROR) {
418 mpool_put(t->bt_mp, pg, 0);
419 return (RET_ERROR);
420 }
421
422 /*
423 * Free the parent if it has only the one key and it's not the
424 * root page. If it's the rootpage, turn it back into an empty
425 * leaf page.
426 */
427 if (NEXTINDEX(pg) == 1)
428 if (pg->pgno == P_ROOT) {
429 pg->lower = BTDATAOFF;
430 pg->upper = t->bt_psize;
431 pg->flags = P_BLEAF;
432 } else {
433 if (__bt_relink(t, pg) || __bt_free(t, pg))
434 return (RET_ERROR);
435 continue;
436 }
437 else {
438 /* Pack remaining key items at the end of the page. */
439 nksize = NBINTERNAL(bi->ksize);
440 from = (char *)pg + pg->upper;
441 memmove(from + nksize, from, (char *)bi - from);
442 pg->upper += nksize;
443
444 /* Adjust indices' offsets, shift the indices down. */
445 offset = pg->linp[index];
446 for (cnt = index, ip = &pg->linp[0]; cnt--; ++ip)
447 if (ip[0] < offset)
448 ip[0] += nksize;
449 for (cnt = NEXTINDEX(pg) - index; --cnt; ++ip)
450 ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
451 pg->lower -= sizeof(indx_t);
452 }
453
454 mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
455 break;
456 }
457
458 /* Free the leaf page, as long as it wasn't the root. */
459 if (h->pgno == P_ROOT) {
460 mpool_put(t->bt_mp, h, MPOOL_DIRTY);
461 return (RET_SUCCESS);
462 }
463 return (__bt_relink(t, h) || __bt_free(t, h));
464 }
465
466 /*
467 * __bt_dleaf --
468 * Delete a single record from a leaf page.
469 *
470 * Parameters:
471 * t: tree
472 * key: referenced key
473 * h: page
474 * index: index on page to delete
475 *
476 * Returns:
477 * RET_SUCCESS, RET_ERROR.
478 */
479 int
480 __bt_dleaf(t, key, h, index)
481 BTREE *t;
482 const DBT *key;
483 PAGE *h;
484 u_int index;
485 {
486 BLEAF *bl;
487 indx_t cnt, *ip, offset;
488 u_int32_t nbytes;
489 void *to;
490 char *from;
491
492 /* If this record is referenced by the cursor, delete the cursor. */
493 if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
494 !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
495 t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == index &&
496 __bt_curdel(t, key, h, index))
497 return (RET_ERROR);
498
499 /* If the entry uses overflow pages, make them available for reuse. */
500 to = bl = GETBLEAF(h, index);
501 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
502 return (RET_ERROR);
503 if (bl->flags & P_BIGDATA &&
504 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
505 return (RET_ERROR);
506
507 /* Pack the remaining key/data items at the end of the page. */
508 nbytes = NBLEAF(bl);
509 from = (char *)h + h->upper;
510 memmove(from + nbytes, from, (char *)to - from);
511 h->upper += nbytes;
512
513 /* Adjust the indices' offsets, shift the indices down. */
514 offset = h->linp[index];
515 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
516 if (ip[0] < offset)
517 ip[0] += nbytes;
518 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
519 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
520 h->lower -= sizeof(indx_t);
521
522 /* If the cursor is on this page, adjust it as necessary. */
523 if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
524 !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
525 t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > index)
526 --t->bt_cursor.pg.index;
527
528 return (RET_SUCCESS);
529 }
530
531 /*
532 * __bt_curdel --
533 * Delete the cursor.
534 *
535 * Parameters:
536 * t: tree
537 * key: referenced key (or NULL)
538 * h: page
539 * index: index on page to delete
540 *
541 * Returns:
542 * RET_SUCCESS, RET_ERROR.
543 */
544 static int
545 __bt_curdel(t, key, h, index)
546 BTREE *t;
547 const DBT *key;
548 PAGE *h;
549 u_int index;
550 {
551 CURSOR *c;
552 EPG e;
553 PAGE *pg;
554 int curcopy, status;
555
556 /*
557 * If there are duplicates, move forward or backward to one.
558 * Otherwise, copy the key into the cursor area.
559 */
560 c = &t->bt_cursor;
561 F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
562
563 curcopy = 0;
564 if (!F_ISSET(t, B_NODUPS)) {
565 /*
566 * We're going to have to do comparisons. If we weren't
567 * provided a copy of the key, i.e. the user is deleting
568 * the current cursor position, get one.
569 */
570 if (key == NULL) {
571 e.page = h;
572 e.index = index;
573 if ((status = __bt_ret(t, &e,
574 &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
575 return (status);
576 curcopy = 1;
577 key = &c->key;
578 }
579 /* Check previous key, if not at the beginning of the page. */
580 if (index > 0) {
581 e.page = h;
582 e.index = index - 1;
583 if (__bt_cmp(t, key, &e) == 0) {
584 F_SET(c, CURS_BEFORE);
585 goto dup2;
586 }
587 }
588 /* Check next key, if not at the end of the page. */
589 if (index < NEXTINDEX(h) - 1) {
590 e.page = h;
591 e.index = index + 1;
592 if (__bt_cmp(t, key, &e) == 0) {
593 F_SET(c, CURS_AFTER);
594 goto dup2;
595 }
596 }
597 /* Check previous key if at the beginning of the page. */
598 if (index == 0 && h->prevpg != P_INVALID) {
599 if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
600 return (RET_ERROR);
601 e.page = pg;
602 e.index = NEXTINDEX(pg) - 1;
603 if (__bt_cmp(t, key, &e) == 0) {
604 F_SET(c, CURS_BEFORE);
605 goto dup1;
606 }
607 mpool_put(t->bt_mp, pg, 0);
608 }
609 /* Check next key if at the end of the page. */
610 if (index == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
611 if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
612 return (RET_ERROR);
613 e.page = pg;
614 e.index = 0;
615 if (__bt_cmp(t, key, &e) == 0) {
616 F_SET(c, CURS_AFTER);
617 dup1: mpool_put(t->bt_mp, pg, 0);
618 dup2: c->pg.pgno = e.page->pgno;
619 c->pg.index = e.index;
620 return (RET_SUCCESS);
621 }
622 mpool_put(t->bt_mp, pg, 0);
623 }
624 }
625 e.page = h;
626 e.index = index;
627 if (curcopy || (status =
628 __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
629 F_SET(c, CURS_ACQUIRE);
630 return (RET_SUCCESS);
631 }
632 return (status);
633 }
634
635 /*
636 * __bt_relink --
637 * Link around a deleted page.
638 *
639 * Parameters:
640 * t: tree
641 * h: page to be deleted
642 */
643 static int
644 __bt_relink(t, h)
645 BTREE *t;
646 PAGE *h;
647 {
648 PAGE *pg;
649
650 if (h->nextpg != P_INVALID) {
651 if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
652 return (RET_ERROR);
653 pg->prevpg = h->prevpg;
654 mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
655 }
656 if (h->prevpg != P_INVALID) {
657 if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
658 return (RET_ERROR);
659 pg->nextpg = h->nextpg;
660 mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
661 }
662 return (0);
663 }
664