bt_delete.c revision 1.5 1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)bt_delete.c 8.4 (Berkeley) 5/31/94";
39 #endif /* LIBC_SCCS and not lint */
40
41 #include <sys/types.h>
42
43 #include <errno.h>
44 #include <stdio.h>
45 #include <string.h>
46
47 #include <db.h>
48 #include "btree.h"
49
50 static int bt_bdelete __P((BTREE *, const DBT *));
51
52 /*
53 * __BT_DELETE -- Delete the item(s) referenced by a key.
54 *
55 * Parameters:
56 * dbp: pointer to access method
57 * key: key to delete
58 * flags: R_CURSOR if deleting what the cursor references
59 *
60 * Returns:
61 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
62 */
63 int
64 __bt_delete(dbp, key, flags)
65 const DB *dbp;
66 const DBT *key;
67 u_int flags;
68 {
69 BTREE *t;
70 int status;
71
72 t = dbp->internal;
73
74 /* Toss any page pinned across calls. */
75 if (t->bt_pinned != NULL) {
76 mpool_put(t->bt_mp, t->bt_pinned, 0);
77 t->bt_pinned = NULL;
78 }
79
80 if (ISSET(t, B_RDONLY)) {
81 errno = EPERM;
82 return (RET_ERROR);
83 }
84
85 switch(flags) {
86 case 0:
87 status = bt_bdelete(t, key);
88 break;
89 case R_CURSOR:
90 /*
91 * If flags is R_CURSOR, delete the cursor; must already have
92 * started a scan and not have already deleted the record. For
93 * the delete cursor bit to have been set requires that the
94 * scan be initialized, so no reason to check.
95 */
96 if (!ISSET(t, B_SEQINIT))
97 goto einval;
98 status = ISSET(t, B_DELCRSR) ?
99 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
100 break;
101 default:
102 einval: errno = EINVAL;
103 return (RET_ERROR);
104 }
105 if (status == RET_SUCCESS)
106 SET(t, B_MODIFIED);
107 return (status);
108 }
109
110 /*
111 * BT_BDELETE -- Delete all key/data pairs matching the specified key.
112 *
113 * Parameters:
114 * tree: tree
115 * key: key to delete
116 *
117 * Returns:
118 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
119 */
120 static int
121 bt_bdelete(t, key)
122 BTREE *t;
123 const DBT *key;
124 {
125 EPG *e, save;
126 PAGE *h;
127 pgno_t cpgno, pg;
128 indx_t cindex;
129 int deleted, dirty1, dirty2, exact;
130
131 /* Find any matching record; __bt_search pins the page. */
132 if ((e = __bt_search(t, key, &exact)) == NULL)
133 return (RET_ERROR);
134 if (!exact) {
135 mpool_put(t->bt_mp, e->page, 0);
136 return (RET_SPECIAL);
137 }
138
139 /*
140 * Delete forward, then delete backward, from the found key. The
141 * ordering is so that the deletions don't mess up the page refs.
142 * The first loop deletes the key from the original page, the second
143 * unpins the original page. In the first loop, dirty1 is set if
144 * the original page is modified, and dirty2 is set if any subsequent
145 * pages are modified. In the second loop, dirty1 starts off set if
146 * the original page has been modified, and is set if any subsequent
147 * pages are modified.
148 *
149 * If find the key referenced by the cursor, don't delete it, just
150 * flag it for future deletion. The cursor page number is P_INVALID
151 * unless the sequential scan is initialized, so no reason to check.
152 * A special case is when the already deleted cursor record was the
153 * only record found. If so, then the delete opertion fails as no
154 * records were deleted.
155 *
156 * Cycle in place in the current page until the current record doesn't
157 * match the key or the page is empty. If the latter, walk forward,
158 * skipping empty pages and repeating until a record doesn't match
159 * the key or the end of the tree is reached.
160 */
161 cpgno = t->bt_bcursor.pgno;
162 cindex = t->bt_bcursor.index;
163 save = *e;
164 dirty1 = 0;
165 for (h = e->page, deleted = 0;;) {
166 dirty2 = 0;
167 do {
168 if (h->pgno == cpgno && e->index == cindex) {
169 if (!ISSET(t, B_DELCRSR)) {
170 SET(t, B_DELCRSR);
171 deleted = 1;
172 }
173 ++e->index;
174 } else {
175 if (__bt_dleaf(t, h, e->index)) {
176 if (h->pgno != save.page->pgno)
177 mpool_put(t->bt_mp, h, dirty2);
178 mpool_put(t->bt_mp, save.page, dirty1);
179 return (RET_ERROR);
180 }
181 if (h->pgno == save.page->pgno)
182 dirty1 = MPOOL_DIRTY;
183 else
184 dirty2 = MPOOL_DIRTY;
185 deleted = 1;
186 }
187 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
188
189 /*
190 * Quit if didn't find a match, no next page, or first key on
191 * the next page doesn't match. Don't unpin the original page
192 * unless an error occurs.
193 */
194 if (e->index < NEXTINDEX(h))
195 break;
196 for (;;) {
197 if ((pg = h->nextpg) == P_INVALID)
198 goto done1;
199 if (h->pgno != save.page->pgno)
200 mpool_put(t->bt_mp, h, dirty2);
201 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
202 mpool_put(t->bt_mp, save.page, dirty1);
203 return (RET_ERROR);
204 }
205 if (NEXTINDEX(h) != 0) {
206 e->page = h;
207 e->index = 0;
208 break;
209 }
210 }
211
212 if (__bt_cmp(t, key, e) != 0)
213 break;
214 }
215
216 /*
217 * Reach here with the original page and the last page referenced
218 * pinned (they may be the same). Release it if not the original.
219 */
220 done1: if (h->pgno != save.page->pgno)
221 mpool_put(t->bt_mp, h, dirty2);
222
223 /*
224 * Walk backwards from the record previous to the record returned by
225 * __bt_search, skipping empty pages, until a record doesn't match
226 * the key or reach the beginning of the tree.
227 */
228 *e = save;
229 for (;;) {
230 if (e->index)
231 --e->index;
232 for (h = e->page; e->index; --e->index) {
233 if (__bt_cmp(t, key, e) != 0)
234 goto done2;
235 if (h->pgno == cpgno && e->index == cindex) {
236 if (!ISSET(t, B_DELCRSR)) {
237 SET(t, B_DELCRSR);
238 deleted = 1;
239 }
240 } else {
241 if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
242 mpool_put(t->bt_mp, h, dirty1);
243 return (RET_ERROR);
244 }
245 if (h->pgno == save.page->pgno)
246 dirty1 = MPOOL_DIRTY;
247 deleted = 1;
248 }
249 }
250
251 if ((pg = h->prevpg) == P_INVALID)
252 goto done2;
253 mpool_put(t->bt_mp, h, dirty1);
254 dirty1 = 0;
255 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
256 return (RET_ERROR);
257 e->index = NEXTINDEX(e->page);
258 }
259
260 /*
261 * Reach here with the last page that was looked at pinned. Release
262 * it.
263 */
264 done2: mpool_put(t->bt_mp, h, dirty1);
265 return (deleted ? RET_SUCCESS : RET_SPECIAL);
266 }
267
268 /*
269 * __BT_DLEAF -- Delete a single record from a leaf page.
270 *
271 * Parameters:
272 * t: tree
273 * index: index on current page to delete
274 *
275 * Returns:
276 * RET_SUCCESS, RET_ERROR.
277 */
278 int
279 __bt_dleaf(t, h, index)
280 BTREE *t;
281 PAGE *h;
282 indx_t index;
283 {
284 register BLEAF *bl;
285 register indx_t cnt, *ip, offset;
286 register u_int32_t nbytes;
287 char *from;
288 void *to;
289
290 /*
291 * Delete a record from a btree leaf page. Internal records are never
292 * deleted from internal pages, regardless of the records that caused
293 * them to be added being deleted. Pages made empty by deletion are
294 * not reclaimed. They are, however, made available for reuse.
295 *
296 * Pack the remaining entries at the end of the page, shift the indices
297 * down, overwriting the deleted record and its index. If the record
298 * uses overflow pages, make them available for reuse.
299 */
300 to = bl = GETBLEAF(h, index);
301 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
302 return (RET_ERROR);
303 if (bl->flags & P_BIGDATA &&
304 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
305 return (RET_ERROR);
306 nbytes = NBLEAF(bl);
307
308 /*
309 * Compress the key/data pairs. Compress and adjust the [BR]LEAF
310 * offsets. Reset the headers.
311 */
312 from = (char *)h + h->upper;
313 memmove(from + nbytes, from, (char *)to - from);
314 h->upper += nbytes;
315
316 offset = h->linp[index];
317 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
318 if (ip[0] < offset)
319 ip[0] += nbytes;
320 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
321 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
322 h->lower -= sizeof(indx_t);
323 return (RET_SUCCESS);
324 }
325