bt_delete.c revision 1.4 1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 /* from: static char sccsid[] = "@(#)bt_delete.c 8.2 (Berkeley) 9/7/93"; */
39 static char *rcsid = "$Id: bt_delete.c,v 1.4 1993/09/09 02:41:22 cgd Exp $";
40 #endif /* LIBC_SCCS and not lint */
41
42 #include <sys/types.h>
43
44 #include <errno.h>
45 #include <stdio.h>
46 #include <string.h>
47
48 #include <db.h>
49 #include "btree.h"
50
51 static int bt_bdelete __P((BTREE *, const DBT *));
52
53 /*
54 * __BT_DELETE -- Delete the item(s) referenced by a key.
55 *
56 * Parameters:
57 * dbp: pointer to access method
58 * key: key to delete
59 * flags: R_CURSOR if deleting what the cursor references
60 *
61 * Returns:
62 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
63 */
64 int
65 __bt_delete(dbp, key, flags)
66 const DB *dbp;
67 const DBT *key;
68 u_int flags;
69 {
70 BTREE *t;
71 int status;
72
73 t = dbp->internal;
74
75 /* Toss any page pinned across calls. */
76 if (t->bt_pinned != NULL) {
77 mpool_put(t->bt_mp, t->bt_pinned, 0);
78 t->bt_pinned = NULL;
79 }
80
81 if (ISSET(t, B_RDONLY)) {
82 errno = EPERM;
83 return (RET_ERROR);
84 }
85
86 switch(flags) {
87 case 0:
88 status = bt_bdelete(t, key);
89 break;
90 case R_CURSOR:
91 /*
92 * If flags is R_CURSOR, delete the cursor; must already have
93 * started a scan and not have already deleted the record. For
94 * the delete cursor bit to have been set requires that the
95 * scan be initialized, so no reason to check.
96 */
97 if (!ISSET(t, B_SEQINIT))
98 goto einval;
99 status = ISSET(t, B_DELCRSR) ?
100 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
101 break;
102 default:
103 einval: errno = EINVAL;
104 return (RET_ERROR);
105 }
106 if (status == RET_SUCCESS)
107 SET(t, B_MODIFIED);
108 return (status);
109 }
110
111 /*
112 * BT_BDELETE -- Delete all key/data pairs matching the specified key.
113 *
114 * Parameters:
115 * tree: tree
116 * key: key to delete
117 *
118 * Returns:
119 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
120 */
121 static int
122 bt_bdelete(t, key)
123 BTREE *t;
124 const DBT *key;
125 {
126 EPG *e, save;
127 PAGE *h;
128 pgno_t cpgno, pg;
129 indx_t cindex;
130 int deleted, dirty1, dirty2, exact;
131
132 /* Find any matching record; __bt_search pins the page. */
133 if ((e = __bt_search(t, key, &exact)) == NULL)
134 return (RET_ERROR);
135 if (!exact) {
136 mpool_put(t->bt_mp, e->page, 0);
137 return (RET_SPECIAL);
138 }
139
140 /*
141 * Delete forward, then delete backward, from the found key. The
142 * ordering is so that the deletions don't mess up the page refs.
143 * The first loop deletes the key from the original page, the second
144 * unpins the original page. In the first loop, dirty1 is set if
145 * the original page is modified, and dirty2 is set if any subsequent
146 * pages are modified. In the second loop, dirty1 starts off set if
147 * the original page has been modified, and is set if any subsequent
148 * pages are modified.
149 *
150 * If find the key referenced by the cursor, don't delete it, just
151 * flag it for future deletion. The cursor page number is P_INVALID
152 * unless the sequential scan is initialized, so no reason to check.
153 * A special case is when the already deleted cursor record was the
154 * only record found. If so, then the delete opertion fails as no
155 * records were deleted.
156 *
157 * Cycle in place in the current page until the current record doesn't
158 * match the key or the page is empty. If the latter, walk forward,
159 * skipping empty pages and repeating until a record doesn't match
160 * the key or the end of the tree is reached.
161 */
162 cpgno = t->bt_bcursor.pgno;
163 cindex = t->bt_bcursor.index;
164 save = *e;
165 dirty1 = 0;
166 for (h = e->page, deleted = 0;;) {
167 dirty2 = 0;
168 do {
169 if (h->pgno == cpgno && e->index == cindex) {
170 if (!ISSET(t, B_DELCRSR)) {
171 SET(t, B_DELCRSR);
172 deleted = 1;
173 }
174 ++e->index;
175 } else {
176 if (__bt_dleaf(t, h, e->index)) {
177 if (h->pgno != save.page->pgno)
178 mpool_put(t->bt_mp, h, dirty2);
179 mpool_put(t->bt_mp, save.page, dirty1);
180 return (RET_ERROR);
181 }
182 if (h->pgno == save.page->pgno)
183 dirty1 = MPOOL_DIRTY;
184 else
185 dirty2 = MPOOL_DIRTY;
186 deleted = 1;
187 }
188 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
189
190 /*
191 * Quit if didn't find a match, no next page, or first key on
192 * the next page doesn't match. Don't unpin the original page
193 * unless an error occurs.
194 */
195 if (e->index < NEXTINDEX(h))
196 break;
197 for (;;) {
198 if ((pg = h->nextpg) == P_INVALID)
199 goto done1;
200 if (h->pgno != save.page->pgno)
201 mpool_put(t->bt_mp, h, dirty2);
202 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
203 mpool_put(t->bt_mp, save.page, dirty1);
204 return (RET_ERROR);
205 }
206 if (NEXTINDEX(h) != 0) {
207 e->page = h;
208 e->index = 0;
209 break;
210 }
211 }
212
213 if (__bt_cmp(t, key, e) != 0)
214 break;
215 }
216
217 /*
218 * Reach here with the original page and the last page referenced
219 * pinned (they may be the same). Release it if not the original.
220 */
221 done1: if (h->pgno != save.page->pgno)
222 mpool_put(t->bt_mp, h, dirty2);
223
224 /*
225 * Walk backwards from the record previous to the record returned by
226 * __bt_search, skipping empty pages, until a record doesn't match
227 * the key or reach the beginning of the tree.
228 */
229 *e = save;
230 for (;;) {
231 if (e->index)
232 --e->index;
233 for (h = e->page; e->index; --e->index) {
234 if (__bt_cmp(t, key, e) != 0)
235 goto done2;
236 if (h->pgno == cpgno && e->index == cindex) {
237 if (!ISSET(t, B_DELCRSR)) {
238 SET(t, B_DELCRSR);
239 deleted = 1;
240 }
241 } else {
242 if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
243 mpool_put(t->bt_mp, h, dirty1);
244 return (RET_ERROR);
245 }
246 if (h->pgno == save.page->pgno)
247 dirty1 = MPOOL_DIRTY;
248 deleted = 1;
249 }
250 }
251
252 if ((pg = h->prevpg) == P_INVALID)
253 goto done2;
254 mpool_put(t->bt_mp, h, dirty1);
255 dirty1 = 0;
256 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
257 return (RET_ERROR);
258 e->index = NEXTINDEX(e->page);
259 }
260
261 /*
262 * Reach here with the last page that was looked at pinned. Release
263 * it.
264 */
265 done2: mpool_put(t->bt_mp, h, dirty1);
266 return (deleted ? RET_SUCCESS : RET_SPECIAL);
267 }
268
269 /*
270 * __BT_DLEAF -- Delete a single record from a leaf page.
271 *
272 * Parameters:
273 * t: tree
274 * index: index on current page to delete
275 *
276 * Returns:
277 * RET_SUCCESS, RET_ERROR.
278 */
279 int
280 __bt_dleaf(t, h, index)
281 BTREE *t;
282 PAGE *h;
283 int index;
284 {
285 register BLEAF *bl;
286 register indx_t *ip, offset;
287 register size_t nbytes;
288 register int cnt;
289 char *from;
290 void *to;
291
292 /*
293 * Delete a record from a btree leaf page. Internal records are never
294 * deleted from internal pages, regardless of the records that caused
295 * them to be added being deleted. Pages made empty by deletion are
296 * not reclaimed. They are, however, made available for reuse.
297 *
298 * Pack the remaining entries at the end of the page, shift the indices
299 * down, overwriting the deleted record and its index. If the record
300 * uses overflow pages, make them available for reuse.
301 */
302 to = bl = GETBLEAF(h, index);
303 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
304 return (RET_ERROR);
305 if (bl->flags & P_BIGDATA &&
306 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
307 return (RET_ERROR);
308 nbytes = NBLEAF(bl);
309
310 /*
311 * Compress the key/data pairs. Compress and adjust the [BR]LEAF
312 * offsets. Reset the headers.
313 */
314 from = (char *)h + h->upper;
315 memmove(from + nbytes, from, (char *)to - from);
316 h->upper += nbytes;
317
318 offset = h->linp[index];
319 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
320 if (ip[0] < offset)
321 ip[0] += nbytes;
322 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
323 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
324 h->lower -= sizeof(indx_t);
325 return (RET_SUCCESS);
326 }
327