bt_seq.c revision 1.5 1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93";
39 #endif /* LIBC_SCCS and not lint */
40
41 #include <sys/types.h>
42
43 #include <errno.h>
44 #include <stddef.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47
48 #include <db.h>
49 #include "btree.h"
50
51 static int bt_seqadv __P((BTREE *, EPG *, int));
52 static int bt_seqset __P((BTREE *, EPG *, DBT *, int));
53
54 /*
55 * Sequential scan support.
56 *
57 * The tree can be scanned sequentially, starting from either end of the tree
58 * or from any specific key. A scan request before any scanning is done is
59 * initialized as starting from the least node.
60 *
61 * Each tree has an EPGNO which has the current position of the cursor. The
62 * cursor has to survive deletions/insertions in the tree without losing its
63 * position. This is done by noting deletions without doing them, and then
64 * doing them when the cursor moves (or the tree is closed).
65 */
66
67 /*
68 * __BT_SEQ -- Btree sequential scan interface.
69 *
70 * Parameters:
71 * dbp: pointer to access method
72 * key: key for positioning and return value
73 * data: data return value
74 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
75 *
76 * Returns:
77 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
78 */
79 int
80 __bt_seq(dbp, key, data, flags)
81 const DB *dbp;
82 DBT *key, *data;
83 u_int flags;
84 {
85 BTREE *t;
86 EPG e;
87 int status;
88
89 t = dbp->internal;
90
91 /* Toss any page pinned across calls. */
92 if (t->bt_pinned != NULL) {
93 mpool_put(t->bt_mp, t->bt_pinned, 0);
94 t->bt_pinned = NULL;
95 }
96
97 /*
98 * If scan unitialized as yet, or starting at a specific record, set
99 * the scan to a specific key. Both bt_seqset and bt_seqadv pin the
100 * page the cursor references if they're successful.
101 */
102 switch(flags) {
103 case R_NEXT:
104 case R_PREV:
105 if (ISSET(t, B_SEQINIT)) {
106 status = bt_seqadv(t, &e, flags);
107 break;
108 }
109 /* FALLTHROUGH */
110 case R_CURSOR:
111 case R_FIRST:
112 case R_LAST:
113 status = bt_seqset(t, &e, key, flags);
114 break;
115 default:
116 errno = EINVAL;
117 return (RET_ERROR);
118 }
119
120 if (status == RET_SUCCESS) {
121 status = __bt_ret(t, &e, key, data);
122
123 /* Update the actual cursor. */
124 t->bt_bcursor.pgno = e.page->pgno;
125 t->bt_bcursor.index = e.index;
126
127 /*
128 * If the user is doing concurrent access, we copied the
129 * key/data, toss the page.
130 */
131 if (ISSET(t, B_DB_LOCK))
132 mpool_put(t->bt_mp, e.page, 0);
133 else
134 t->bt_pinned = e.page;
135 SET(t, B_SEQINIT);
136 }
137 return (status);
138 }
139
140 /*
141 * BT_SEQSET -- Set the sequential scan to a specific key.
142 *
143 * Parameters:
144 * t: tree
145 * ep: storage for returned key
146 * key: key for initial scan position
147 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
148 *
149 * Side effects:
150 * Pins the page the cursor references.
151 *
152 * Returns:
153 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
154 */
155 static int
156 bt_seqset(t, ep, key, flags)
157 BTREE *t;
158 EPG *ep;
159 DBT *key;
160 int flags;
161 {
162 EPG *e;
163 PAGE *h;
164 pgno_t pg;
165 int exact;
166
167 /*
168 * Delete any already deleted record that we've been saving because
169 * the cursor pointed to it. Since going to a specific key, should
170 * delete any logically deleted records so they aren't found.
171 */
172 if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
173 return (RET_ERROR);
174
175 /*
176 * Find the first, last or specific key in the tree and point the cursor
177 * at it. The cursor may not be moved until a new key has been found.
178 */
179 switch(flags) {
180 case R_CURSOR: /* Keyed scan. */
181 /*
182 * Find the first instance of the key or the smallest key which
183 * is greater than or equal to the specified key. If run out
184 * of keys, return RET_SPECIAL.
185 */
186 if (key->data == NULL || key->size == 0) {
187 errno = EINVAL;
188 return (RET_ERROR);
189 }
190 e = __bt_first(t, key, &exact); /* Returns pinned page. */
191 if (e == NULL)
192 return (RET_ERROR);
193 /*
194 * If at the end of a page, skip any empty pages and find the
195 * next entry.
196 */
197 if (e->index == NEXTINDEX(e->page)) {
198 h = e->page;
199 do {
200 pg = h->nextpg;
201 mpool_put(t->bt_mp, h, 0);
202 if (pg == P_INVALID)
203 return (RET_SPECIAL);
204 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
205 return (RET_ERROR);
206 } while (NEXTINDEX(h) == 0);
207 e->index = 0;
208 e->page = h;
209 }
210 *ep = *e;
211 break;
212 case R_FIRST: /* First record. */
213 case R_NEXT:
214 /* Walk down the left-hand side of the tree. */
215 for (pg = P_ROOT;;) {
216 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
217 return (RET_ERROR);
218 if (h->flags & (P_BLEAF | P_RLEAF))
219 break;
220 pg = GETBINTERNAL(h, 0)->pgno;
221 mpool_put(t->bt_mp, h, 0);
222 }
223
224 /* Skip any empty pages. */
225 while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
226 pg = h->nextpg;
227 mpool_put(t->bt_mp, h, 0);
228 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
229 return (RET_ERROR);
230 }
231
232 if (NEXTINDEX(h) == 0) {
233 mpool_put(t->bt_mp, h, 0);
234 return (RET_SPECIAL);
235 }
236
237 ep->page = h;
238 ep->index = 0;
239 break;
240 case R_LAST: /* Last record. */
241 case R_PREV:
242 /* Walk down the right-hand side of the tree. */
243 for (pg = P_ROOT;;) {
244 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
245 return (RET_ERROR);
246 if (h->flags & (P_BLEAF | P_RLEAF))
247 break;
248 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
249 mpool_put(t->bt_mp, h, 0);
250 }
251
252 /* Skip any empty pages. */
253 while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
254 pg = h->prevpg;
255 mpool_put(t->bt_mp, h, 0);
256 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
257 return (RET_ERROR);
258 }
259
260 if (NEXTINDEX(h) == 0) {
261 mpool_put(t->bt_mp, h, 0);
262 return (RET_SPECIAL);
263 }
264
265 ep->page = h;
266 ep->index = NEXTINDEX(h) - 1;
267 break;
268 }
269 return (RET_SUCCESS);
270 }
271
272 /*
273 * BT_SEQADVANCE -- Advance the sequential scan.
274 *
275 * Parameters:
276 * t: tree
277 * flags: R_NEXT, R_PREV
278 *
279 * Side effects:
280 * Pins the page the new key/data record is on.
281 *
282 * Returns:
283 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
284 */
285 static int
286 bt_seqadv(t, e, flags)
287 BTREE *t;
288 EPG *e;
289 int flags;
290 {
291 EPGNO *c, delc;
292 PAGE *h;
293 indx_t index;
294 pgno_t pg;
295
296 /* Save the current cursor if going to delete it. */
297 c = &t->bt_bcursor;
298 if (ISSET(t, B_DELCRSR))
299 delc = *c;
300
301 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
302 return (RET_ERROR);
303
304 /*
305 * Find the next/previous record in the tree and point the cursor at it.
306 * The cursor may not be moved until a new key has been found.
307 */
308 index = c->index;
309 switch(flags) {
310 case R_NEXT: /* Next record. */
311 if (++index == NEXTINDEX(h)) {
312 do {
313 pg = h->nextpg;
314 mpool_put(t->bt_mp, h, 0);
315 if (pg == P_INVALID)
316 return (RET_SPECIAL);
317 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
318 return (RET_ERROR);
319 } while (NEXTINDEX(h) == 0);
320 index = 0;
321 }
322 break;
323 case R_PREV: /* Previous record. */
324 if (index-- == 0) {
325 do {
326 pg = h->prevpg;
327 mpool_put(t->bt_mp, h, 0);
328 if (pg == P_INVALID)
329 return (RET_SPECIAL);
330 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
331 return (RET_ERROR);
332 } while (NEXTINDEX(h) == 0);
333 index = NEXTINDEX(h) - 1;
334 }
335 break;
336 }
337
338 e->page = h;
339 e->index = index;
340
341 /*
342 * Delete any already deleted record that we've been saving because the
343 * cursor pointed to it. This could cause the new index to be shifted
344 * down by one if the record we're deleting is on the same page and has
345 * a larger index.
346 */
347 if (ISSET(t, B_DELCRSR)) {
348 CLR(t, B_DELCRSR); /* Don't try twice. */
349 if (c->pgno == delc.pgno && c->index > delc.index)
350 --c->index;
351 if (__bt_crsrdel(t, &delc))
352 return (RET_ERROR);
353 }
354 return (RET_SUCCESS);
355 }
356
357 /*
358 * __BT_CRSRDEL -- Delete the record referenced by the cursor.
359 *
360 * Parameters:
361 * t: tree
362 *
363 * Returns:
364 * RET_ERROR, RET_SUCCESS
365 */
366 int
367 __bt_crsrdel(t, c)
368 BTREE *t;
369 EPGNO *c;
370 {
371 PAGE *h;
372 int status;
373
374 CLR(t, B_DELCRSR); /* Don't try twice. */
375 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
376 return (RET_ERROR);
377 status = __bt_dleaf(t, h, c->index);
378 mpool_put(t->bt_mp, h, MPOOL_DIRTY);
379 return (status);
380 }
381