bt_seq.c revision 1.1 1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)bt_seq.c 8.1 (Berkeley) 6/4/93";
39 #endif /* LIBC_SCCS and not lint */
40
41 #include <sys/types.h>
42
43 #include <errno.h>
44 #include <stddef.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47
48 #include <db.h>
49 #include "btree.h"
50
51 static int bt_seqadv __P((BTREE *, EPG *, int));
52 static int bt_seqset __P((BTREE *, EPG *, DBT *, int));
53
54 /*
55 * Sequential scan support.
56 *
57 * The tree can be scanned sequentially, starting from either end of the tree
58 * or from any specific key. A scan request before any scanning is done is
59 * initialized as starting from the least node.
60 *
61 * Each tree has an EPGNO which has the current position of the cursor. The
62 * cursor has to survive deletions/insertions in the tree without losing its
63 * position. This is done by noting deletions without doing them, and then
64 * doing them when the cursor moves (or the tree is closed).
65 */
66
67 /*
68 * __BT_SEQ -- Btree sequential scan interface.
69 *
70 * Parameters:
71 * dbp: pointer to access method
72 * key: key for positioning and return value
73 * data: data return value
74 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
75 *
76 * Returns:
77 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
78 */
79 int
80 __bt_seq(dbp, key, data, flags)
81 const DB *dbp;
82 DBT *key, *data;
83 u_int flags;
84 {
85 BTREE *t;
86 EPG e;
87 int status;
88
89 /*
90 * If scan unitialized as yet, or starting at a specific record, set
91 * the scan to a specific key. Both bt_seqset and bt_seqadv pin the
92 * page the cursor references if they're successful.
93 */
94 t = dbp->internal;
95 switch(flags) {
96 case R_NEXT:
97 case R_PREV:
98 if (ISSET(t, B_SEQINIT)) {
99 status = bt_seqadv(t, &e, flags);
100 break;
101 }
102 /* FALLTHROUGH */
103 case R_CURSOR:
104 case R_FIRST:
105 case R_LAST:
106 status = bt_seqset(t, &e, key, flags);
107 break;
108 default:
109 errno = EINVAL;
110 return (RET_ERROR);
111 }
112
113 if (status == RET_SUCCESS) {
114 status = __bt_ret(t, &e, key, data);
115
116 /* Update the actual cursor. */
117 t->bt_bcursor.pgno = e.page->pgno;
118 t->bt_bcursor.index = e.index;
119 mpool_put(t->bt_mp, e.page, 0);
120 SET(t, B_SEQINIT);
121 }
122 return (status);
123 }
124
125 /*
126 * BT_SEQSET -- Set the sequential scan to a specific key.
127 *
128 * Parameters:
129 * t: tree
130 * ep: storage for returned key
131 * key: key for initial scan position
132 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
133 *
134 * Side effects:
135 * Pins the page the cursor references.
136 *
137 * Returns:
138 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
139 */
140 static int
141 bt_seqset(t, ep, key, flags)
142 BTREE *t;
143 EPG *ep;
144 DBT *key;
145 int flags;
146 {
147 EPG *e;
148 PAGE *h;
149 pgno_t pg;
150 int exact;
151
152 /*
153 * Delete any already deleted record that we've been saving because
154 * the cursor pointed to it. Since going to a specific key, should
155 * delete any logically deleted records so they aren't found.
156 */
157 if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor))
158 return (RET_ERROR);
159
160 /*
161 * Find the first, last or specific key in the tree and point the cursor
162 * at it. The cursor may not be moved until a new key has been found.
163 */
164 switch(flags) {
165 case R_CURSOR: /* Keyed scan. */
166 /*
167 * Find the first instance of the key or the smallest key which
168 * is greater than or equal to the specified key. If run out
169 * of keys, return RET_SPECIAL.
170 */
171 if (key->data == NULL || key->size == 0) {
172 errno = EINVAL;
173 return (RET_ERROR);
174 }
175 e = __bt_first(t, key, &exact); /* Returns pinned page. */
176 if (e == NULL)
177 return (RET_ERROR);
178 /*
179 * If at the end of a page, skip any empty pages and find the
180 * next entry.
181 */
182 if (e->index == NEXTINDEX(e->page)) {
183 h = e->page;
184 do {
185 pg = h->nextpg;
186 mpool_put(t->bt_mp, h, 0);
187 if (pg == P_INVALID)
188 return (RET_SPECIAL);
189 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
190 return (RET_ERROR);
191 } while (NEXTINDEX(h) == 0);
192 e->index = 0;
193 e->page = h;
194 }
195 *ep = *e;
196 break;
197 case R_FIRST: /* First record. */
198 case R_NEXT:
199 /* Walk down the left-hand side of the tree. */
200 for (pg = P_ROOT;;) {
201 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
202 return (RET_ERROR);
203 if (h->flags & (P_BLEAF | P_RLEAF))
204 break;
205 pg = GETBINTERNAL(h, 0)->pgno;
206 mpool_put(t->bt_mp, h, 0);
207 }
208
209 /* Skip any empty pages. */
210 while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) {
211 pg = h->nextpg;
212 mpool_put(t->bt_mp, h, 0);
213 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
214 return (RET_ERROR);
215 }
216
217 if (NEXTINDEX(h) == 0) {
218 mpool_put(t->bt_mp, h, 0);
219 return (RET_SPECIAL);
220 }
221
222 ep->page = h;
223 ep->index = 0;
224 break;
225 case R_LAST: /* Last record. */
226 case R_PREV:
227 /* Walk down the right-hand side of the tree. */
228 for (pg = P_ROOT;;) {
229 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
230 return (RET_ERROR);
231 if (h->flags & (P_BLEAF | P_RLEAF))
232 break;
233 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
234 mpool_put(t->bt_mp, h, 0);
235 }
236
237 /* Skip any empty pages. */
238 while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) {
239 pg = h->prevpg;
240 mpool_put(t->bt_mp, h, 0);
241 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
242 return (RET_ERROR);
243 }
244
245 if (NEXTINDEX(h) == 0) {
246 mpool_put(t->bt_mp, h, 0);
247 return (RET_SPECIAL);
248 }
249
250 ep->page = h;
251 ep->index = NEXTINDEX(h) - 1;
252 break;
253 }
254 return (RET_SUCCESS);
255 }
256
257 /*
258 * BT_SEQADVANCE -- Advance the sequential scan.
259 *
260 * Parameters:
261 * t: tree
262 * flags: R_NEXT, R_PREV
263 *
264 * Side effects:
265 * Pins the page the new key/data record is on.
266 *
267 * Returns:
268 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
269 */
270 static int
271 bt_seqadv(t, e, flags)
272 BTREE *t;
273 EPG *e;
274 int flags;
275 {
276 EPGNO *c, delc;
277 PAGE *h;
278 indx_t index;
279 pgno_t pg;
280
281 /* Save the current cursor if going to delete it. */
282 c = &t->bt_bcursor;
283 if (ISSET(t, B_DELCRSR))
284 delc = *c;
285
286 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
287 return (RET_ERROR);
288
289 /*
290 * Find the next/previous record in the tree and point the cursor at it.
291 * The cursor may not be moved until a new key has been found.
292 */
293 index = c->index;
294 switch(flags) {
295 case R_NEXT: /* Next record. */
296 if (++index == NEXTINDEX(h)) {
297 do {
298 pg = h->nextpg;
299 mpool_put(t->bt_mp, h, 0);
300 if (pg == P_INVALID)
301 return (RET_SPECIAL);
302 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
303 return (RET_ERROR);
304 } while (NEXTINDEX(h) == 0);
305 index = 0;
306 }
307 break;
308 case R_PREV: /* Previous record. */
309 if (index-- == 0) {
310 do {
311 pg = h->prevpg;
312 mpool_put(t->bt_mp, h, 0);
313 if (pg == P_INVALID)
314 return (RET_SPECIAL);
315 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
316 return (RET_ERROR);
317 } while (NEXTINDEX(h) == 0);
318 index = NEXTINDEX(h) - 1;
319 }
320 break;
321 }
322
323 e->page = h;
324 e->index = index;
325
326 /*
327 * Delete any already deleted record that we've been saving because the
328 * cursor pointed to it. This could cause the new index to be shifted
329 * down by one if the record we're deleting is on the same page and has
330 * a larger index.
331 */
332 if (ISSET(t, B_DELCRSR)) {
333 CLR(t, B_DELCRSR); /* Don't try twice. */
334 if (c->pgno == delc.pgno && c->index > delc.index)
335 --c->index;
336 if (__bt_crsrdel(t, &delc))
337 return (RET_ERROR);
338 }
339 return (RET_SUCCESS);
340 }
341
342 /*
343 * __BT_CRSRDEL -- Delete the record referenced by the cursor.
344 *
345 * Parameters:
346 * t: tree
347 *
348 * Returns:
349 * RET_ERROR, RET_SUCCESS
350 */
351 int
352 __bt_crsrdel(t, c)
353 BTREE *t;
354 EPGNO *c;
355 {
356 PAGE *h;
357 int status;
358
359 CLR(t, B_DELCRSR); /* Don't try twice. */
360 if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL)
361 return (RET_ERROR);
362 status = __bt_dleaf(t, h, c->index);
363 mpool_put(t->bt_mp, h, MPOOL_DIRTY);
364 return (status);
365 }
366