bt_delete.c revision 1.3 1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 /*static char *sccsid = "from: @(#)bt_delete.c 8.1 (Berkeley) 6/4/93";*/
39 static char *rcsid = "$Id: bt_delete.c,v 1.3 1993/08/26 00:43:19 jtc Exp $";
40 #endif /* LIBC_SCCS and not lint */
41
42 #include <sys/types.h>
43
44 #include <errno.h>
45 #include <stdio.h>
46 #include <string.h>
47
48 #include <db.h>
49 #include "btree.h"
50
51 static int bt_bdelete __P((BTREE *, const DBT *));
52
53 /*
54 * __BT_DELETE -- Delete the item(s) referenced by a key.
55 *
56 * Parameters:
57 * dbp: pointer to access method
58 * key: key to delete
59 * flags: R_CURSOR if deleting what the cursor references
60 *
61 * Returns:
62 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
63 */
64 int
65 __bt_delete(dbp, key, flags)
66 const DB *dbp;
67 const DBT *key;
68 u_int flags;
69 {
70 BTREE *t;
71 int status;
72
73 t = dbp->internal;
74 if (ISSET(t, B_RDONLY)) {
75 errno = EPERM;
76 return (RET_ERROR);
77 }
78 switch(flags) {
79 case 0:
80 status = bt_bdelete(t, key);
81 break;
82 case R_CURSOR:
83 /*
84 * If flags is R_CURSOR, delete the cursor; must already have
85 * started a scan and not have already deleted the record. For
86 * the delete cursor bit to have been set requires that the
87 * scan be initialized, so no reason to check.
88 */
89 if (!ISSET(t, B_SEQINIT))
90 goto einval;
91 status = ISSET(t, B_DELCRSR) ?
92 RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor);
93 break;
94 default:
95 einval: errno = EINVAL;
96 return (RET_ERROR);
97 }
98 if (status == RET_SUCCESS)
99 SET(t, B_MODIFIED);
100 return (status);
101 }
102
103 /*
104 * BT_BDELETE -- Delete all key/data pairs matching the specified key.
105 *
106 * Parameters:
107 * tree: tree
108 * key: key to delete
109 *
110 * Returns:
111 * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
112 */
113 static int
114 bt_bdelete(t, key)
115 BTREE *t;
116 const DBT *key;
117 {
118 EPG *e, save;
119 PAGE *h;
120 pgno_t cpgno, pg;
121 indx_t cindex;
122 int deleted, dirty1, dirty2, exact;
123
124 /* Find any matching record; __bt_search pins the page. */
125 if ((e = __bt_search(t, key, &exact)) == NULL)
126 return (RET_ERROR);
127 if (!exact) {
128 mpool_put(t->bt_mp, e->page, 0);
129 return (RET_SPECIAL);
130 }
131
132 /*
133 * Delete forward, then delete backward, from the found key. The
134 * ordering is so that the deletions don't mess up the page refs.
135 * The first loop deletes the key from the original page, the second
136 * unpins the original page. In the first loop, dirty1 is set if
137 * the original page is modified, and dirty2 is set if any subsequent
138 * pages are modified. In the second loop, dirty1 starts off set if
139 * the original page has been modified, and is set if any subsequent
140 * pages are modified.
141 *
142 * If find the key referenced by the cursor, don't delete it, just
143 * flag it for future deletion. The cursor page number is P_INVALID
144 * unless the sequential scan is initialized, so no reason to check.
145 * A special case is when the already deleted cursor record was the
146 * only record found. If so, then the delete opertion fails as no
147 * records were deleted.
148 *
149 * Cycle in place in the current page until the current record doesn't
150 * match the key or the page is empty. If the latter, walk forward,
151 * skipping empty pages and repeating until a record doesn't match
152 * the key or the end of the tree is reached.
153 */
154 cpgno = t->bt_bcursor.pgno;
155 cindex = t->bt_bcursor.index;
156 save = *e;
157 dirty1 = 0;
158 for (h = e->page, deleted = 0;;) {
159 dirty2 = 0;
160 do {
161 if (h->pgno == cpgno && e->index == cindex) {
162 if (!ISSET(t, B_DELCRSR)) {
163 SET(t, B_DELCRSR);
164 deleted = 1;
165 }
166 ++e->index;
167 } else {
168 if (__bt_dleaf(t, h, e->index)) {
169 if (h->pgno != save.page->pgno)
170 mpool_put(t->bt_mp, h, dirty2);
171 mpool_put(t->bt_mp, save.page, dirty1);
172 return (RET_ERROR);
173 }
174 if (h->pgno == save.page->pgno)
175 dirty1 = MPOOL_DIRTY;
176 else
177 dirty2 = MPOOL_DIRTY;
178 deleted = 1;
179 }
180 } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
181
182 /*
183 * Quit if didn't find a match, no next page, or first key on
184 * the next page doesn't match. Don't unpin the original page
185 * unless an error occurs.
186 */
187 if (e->index < NEXTINDEX(h))
188 break;
189 for (;;) {
190 if ((pg = h->nextpg) == P_INVALID)
191 goto done1;
192 if (h->pgno != save.page->pgno)
193 mpool_put(t->bt_mp, h, dirty2);
194 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) {
195 mpool_put(t->bt_mp, save.page, dirty1);
196 return (RET_ERROR);
197 }
198 if (NEXTINDEX(h) != 0) {
199 e->page = h;
200 e->index = 0;
201 break;
202 }
203 }
204
205 if (__bt_cmp(t, key, e) != 0)
206 break;
207 }
208
209 /*
210 * Reach here with the original page and the last page referenced
211 * pinned (they may be the same). Release it if not the original.
212 */
213 done1: if (h->pgno != save.page->pgno)
214 mpool_put(t->bt_mp, h, dirty2);
215
216 /*
217 * Walk backwards from the record previous to the record returned by
218 * __bt_search, skipping empty pages, until a record doesn't match
219 * the key or reach the beginning of the tree.
220 */
221 *e = save;
222 for (;;) {
223 if (e->index)
224 --e->index;
225 for (h = e->page; e->index; --e->index) {
226 if (__bt_cmp(t, key, e) != 0)
227 goto done2;
228 if (h->pgno == cpgno && e->index == cindex) {
229 if (!ISSET(t, B_DELCRSR)) {
230 SET(t, B_DELCRSR);
231 deleted = 1;
232 }
233 } else {
234 if (__bt_dleaf(t, h, e->index) == RET_ERROR) {
235 mpool_put(t->bt_mp, h, dirty1);
236 return (RET_ERROR);
237 }
238 if (h->pgno == save.page->pgno)
239 dirty1 = MPOOL_DIRTY;
240 deleted = 1;
241 }
242 }
243
244 if ((pg = h->prevpg) == P_INVALID)
245 goto done2;
246 mpool_put(t->bt_mp, h, dirty1);
247 dirty1 = 0;
248 if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL)
249 return (RET_ERROR);
250 e->index = NEXTINDEX(e->page);
251 }
252
253 /*
254 * Reach here with the last page that was looked at pinned. Release
255 * it.
256 */
257 done2: mpool_put(t->bt_mp, h, dirty1);
258 return (deleted ? RET_SUCCESS : RET_SPECIAL);
259 }
260
261 /*
262 * __BT_DLEAF -- Delete a single record from a leaf page.
263 *
264 * Parameters:
265 * t: tree
266 * index: index on current page to delete
267 *
268 * Returns:
269 * RET_SUCCESS, RET_ERROR.
270 */
271 int
272 __bt_dleaf(t, h, index)
273 BTREE *t;
274 PAGE *h;
275 int index;
276 {
277 register BLEAF *bl;
278 register indx_t *ip, offset;
279 register size_t nbytes;
280 register int cnt;
281 char *from;
282 void *to;
283
284 /*
285 * Delete a record from a btree leaf page. Internal records are never
286 * deleted from internal pages, regardless of the records that caused
287 * them to be added being deleted. Pages made empty by deletion are
288 * not reclaimed. They are, however, made available for reuse.
289 *
290 * Pack the remaining entries at the end of the page, shift the indices
291 * down, overwriting the deleted record and its index. If the record
292 * uses overflow pages, make them available for reuse.
293 */
294 to = bl = GETBLEAF(h, index);
295 if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
296 return (RET_ERROR);
297 if (bl->flags & P_BIGDATA &&
298 __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
299 return (RET_ERROR);
300 nbytes = NBLEAF(bl);
301
302 /*
303 * Compress the key/data pairs. Compress and adjust the [BR]LEAF
304 * offsets. Reset the headers.
305 */
306 from = (char *)h + h->upper;
307 memmove(from + nbytes, from, (char *)to - from);
308 h->upper += nbytes;
309
310 offset = h->linp[index];
311 for (cnt = index, ip = &h->linp[0]; cnt--; ++ip)
312 if (ip[0] < offset)
313 ip[0] += nbytes;
314 for (cnt = NEXTINDEX(h) - index; --cnt; ++ip)
315 ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
316 h->lower -= sizeof(indx_t);
317 return (RET_SUCCESS);
318 }
319