bt_seq.c revision 1.8 1 /* $NetBSD: bt_seq.c,v 1.8 1997/05/17 19:28:11 pk Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #if defined(LIBC_SCCS) && !defined(lint)
40 #if 0
41 static char sccsid[] = "@(#)bt_seq.c 8.7 (Berkeley) 7/20/94";
42 #else
43 static char rcsid[] = "$NetBSD: bt_seq.c,v 1.8 1997/05/17 19:28:11 pk Exp $";
44 #endif
45 #endif /* LIBC_SCCS and not lint */
46
47 #include <sys/types.h>
48
49 #include <errno.h>
50 #include <stddef.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53
54 #include <db.h>
55 #include "btree.h"
56
57 static int __bt_first __P((BTREE *, const DBT *, EPG *, int *));
58 static int __bt_seqadv __P((BTREE *, EPG *, int));
59 static int __bt_seqset __P((BTREE *, EPG *, DBT *, int));
60
61 /*
62 * Sequential scan support.
63 *
64 * The tree can be scanned sequentially, starting from either end of the
65 * tree or from any specific key. A scan request before any scanning is
66 * done is initialized as starting from the least node.
67 */
68
69 /*
70 * __bt_seq --
71 * Btree sequential scan interface.
72 *
73 * Parameters:
74 * dbp: pointer to access method
75 * key: key for positioning and return value
76 * data: data return value
77 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
78 *
79 * Returns:
80 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
81 */
82 int
83 __bt_seq(dbp, key, data, flags)
84 const DB *dbp;
85 DBT *key, *data;
86 u_int flags;
87 {
88 BTREE *t;
89 EPG e;
90 int status;
91
92 t = dbp->internal;
93
94 /* Toss any page pinned across calls. */
95 if (t->bt_pinned != NULL) {
96 mpool_put(t->bt_mp, t->bt_pinned, 0);
97 t->bt_pinned = NULL;
98 }
99
100 /*
101 * If scan unitialized as yet, or starting at a specific record, set
102 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin
103 * the page the cursor references if they're successful.
104 */
105 switch (flags) {
106 case R_NEXT:
107 case R_PREV:
108 if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
109 status = __bt_seqadv(t, &e, flags);
110 break;
111 }
112 /* FALLTHROUGH */
113 case R_FIRST:
114 case R_LAST:
115 case R_CURSOR:
116 status = __bt_seqset(t, &e, key, flags);
117 break;
118 default:
119 errno = EINVAL;
120 return (RET_ERROR);
121 }
122
123 if (status == RET_SUCCESS) {
124 __bt_setcur(t, e.page->pgno, e.index);
125
126 status =
127 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
128
129 /*
130 * If the user is doing concurrent access, we copied the
131 * key/data, toss the page.
132 */
133 if (F_ISSET(t, B_DB_LOCK))
134 mpool_put(t->bt_mp, e.page, 0);
135 else
136 t->bt_pinned = e.page;
137 }
138 return (status);
139 }
140
141 /*
142 * __bt_seqset --
143 * Set the sequential scan to a specific key.
144 *
145 * Parameters:
146 * t: tree
147 * ep: storage for returned key
148 * key: key for initial scan position
149 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
150 *
151 * Side effects:
152 * Pins the page the cursor references.
153 *
154 * Returns:
155 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
156 */
157 static int
158 __bt_seqset(t, ep, key, flags)
159 BTREE *t;
160 EPG *ep;
161 DBT *key;
162 int flags;
163 {
164 PAGE *h;
165 pgno_t pg;
166 int exact;
167
168 /*
169 * Find the first, last or specific key in the tree and point the
170 * cursor at it. The cursor may not be moved until a new key has
171 * been found.
172 */
173 switch (flags) {
174 case R_CURSOR: /* Keyed scan. */
175 /*
176 * Find the first instance of the key or the smallest key
177 * which is greater than or equal to the specified key.
178 */
179 if (key->data == NULL || key->size == 0) {
180 errno = EINVAL;
181 return (RET_ERROR);
182 }
183 return (__bt_first(t, key, ep, &exact));
184 case R_FIRST: /* First record. */
185 case R_NEXT:
186 /* Walk down the left-hand side of the tree. */
187 for (pg = P_ROOT;;) {
188 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
189 return (RET_ERROR);
190
191 /* Check for an empty tree. */
192 if (NEXTINDEX(h) == 0) {
193 mpool_put(t->bt_mp, h, 0);
194 return (RET_SPECIAL);
195 }
196
197 if (h->flags & (P_BLEAF | P_RLEAF))
198 break;
199 pg = GETBINTERNAL(h, 0)->pgno;
200 mpool_put(t->bt_mp, h, 0);
201 }
202 ep->page = h;
203 ep->index = 0;
204 break;
205 case R_LAST: /* Last record. */
206 case R_PREV:
207 /* Walk down the right-hand side of the tree. */
208 for (pg = P_ROOT;;) {
209 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
210 return (RET_ERROR);
211
212 /* Check for an empty tree. */
213 if (NEXTINDEX(h) == 0) {
214 mpool_put(t->bt_mp, h, 0);
215 return (RET_SPECIAL);
216 }
217
218 if (h->flags & (P_BLEAF | P_RLEAF))
219 break;
220 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
221 mpool_put(t->bt_mp, h, 0);
222 }
223
224 ep->page = h;
225 ep->index = NEXTINDEX(h) - 1;
226 break;
227 }
228 return (RET_SUCCESS);
229 }
230
231 /*
232 * __bt_seqadvance --
233 * Advance the sequential scan.
234 *
235 * Parameters:
236 * t: tree
237 * flags: R_NEXT, R_PREV
238 *
239 * Side effects:
240 * Pins the page the new key/data record is on.
241 *
242 * Returns:
243 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
244 */
245 static int
246 __bt_seqadv(t, ep, flags)
247 BTREE *t;
248 EPG *ep;
249 int flags;
250 {
251 CURSOR *c;
252 PAGE *h;
253 indx_t index;
254 pgno_t pg;
255 int exact;
256
257 /*
258 * There are a couple of states that we can be in. The cursor has
259 * been initialized by the time we get here, but that's all we know.
260 */
261 c = &t->bt_cursor;
262
263 /*
264 * The cursor was deleted where there weren't any duplicate records,
265 * so the key was saved. Find out where that key would go in the
266 * current tree. It doesn't matter if the returned key is an exact
267 * match or not -- if it's an exact match, the record was added after
268 * the delete so we can just return it. If not, as long as there's
269 * a record there, return it.
270 */
271 if (F_ISSET(c, CURS_ACQUIRE))
272 return (__bt_first(t, &c->key, ep, &exact));
273
274 /* Get the page referenced by the cursor. */
275 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
276 return (RET_ERROR);
277
278 /*
279 * Find the next/previous record in the tree and point the cursor at
280 * it. The cursor may not be moved until a new key has been found.
281 */
282 switch (flags) {
283 case R_NEXT: /* Next record. */
284 /*
285 * The cursor was deleted in duplicate records, and moved
286 * forward to a record that has yet to be returned. Clear
287 * that flag, and return the record.
288 */
289 if (F_ISSET(c, CURS_AFTER))
290 goto usecurrent;
291 index = c->pg.index;
292 if (++index == NEXTINDEX(h)) {
293 pg = h->nextpg;
294 mpool_put(t->bt_mp, h, 0);
295 if (pg == P_INVALID)
296 return (RET_SPECIAL);
297 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
298 return (RET_ERROR);
299 index = 0;
300 }
301 break;
302 case R_PREV: /* Previous record. */
303 /*
304 * The cursor was deleted in duplicate records, and moved
305 * backward to a record that has yet to be returned. Clear
306 * that flag, and return the record.
307 */
308 if (F_ISSET(c, CURS_BEFORE)) {
309 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE);
310 ep->page = h;
311 ep->index = c->pg.index;
312 return (RET_SUCCESS);
313 }
314 index = c->pg.index;
315 if (index == 0) {
316 pg = h->prevpg;
317 mpool_put(t->bt_mp, h, 0);
318 if (pg == P_INVALID)
319 return (RET_SPECIAL);
320 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
321 return (RET_ERROR);
322 index = NEXTINDEX(h) - 1;
323 } else
324 --index;
325 break;
326 }
327
328 ep->page = h;
329 ep->index = index;
330 return (RET_SUCCESS);
331 }
332
333 /*
334 * __bt_first --
335 * Find the first entry.
336 *
337 * Parameters:
338 * t: the tree
339 * key: the key
340 * erval: return EPG
341 * exactp: pointer to exact match flag
342 *
343 * Returns:
344 * The first entry in the tree greater than or equal to key,
345 * or RET_SPECIAL if no such key exists.
346 */
347 static int
348 __bt_first(t, key, erval, exactp)
349 BTREE *t;
350 const DBT *key;
351 EPG *erval;
352 int *exactp;
353 {
354 PAGE *h;
355 EPG *ep, save;
356 pgno_t pg;
357
358 /*
359 * Find any matching record; __bt_search pins the page.
360 *
361 * If it's an exact match and duplicates are possible, walk backwards
362 * in the tree until we find the first one. Otherwise, make sure it's
363 * a valid key (__bt_search may return an index just past the end of a
364 * page) and return it.
365 */
366 if ((ep = __bt_search(t, key, exactp)) == NULL)
367 return (0);
368 if (*exactp) {
369 if (F_ISSET(t, B_NODUPS)) {
370 *erval = *ep;
371 return (RET_SUCCESS);
372 }
373
374 /*
375 * Walk backwards, as long as the entry matches and there are
376 * keys left in the tree. Save a copy of each match in case
377 * we go too far.
378 */
379 save = *ep;
380 h = ep->page;
381 do {
382 if (save.page->pgno != ep->page->pgno) {
383 mpool_put(t->bt_mp, save.page, 0);
384 save = *ep;
385 } else
386 save.index = ep->index;
387
388 /*
389 * Don't unpin the page the last (or original) match
390 * was on, but make sure it's unpinned if an error
391 * occurs.
392 */
393 if (ep->index == 0) {
394 if (h->prevpg == P_INVALID)
395 break;
396 if (h->pgno != save.page->pgno)
397 mpool_put(t->bt_mp, h, 0);
398 if ((h = mpool_get(t->bt_mp,
399 h->prevpg, 0)) == NULL) {
400 if (h->pgno == save.page->pgno)
401 mpool_put(t->bt_mp,
402 save.page, 0);
403 return (RET_ERROR);
404 }
405 ep->page = h;
406 ep->index = NEXTINDEX(h);
407 }
408 --ep->index;
409 } while (__bt_cmp(t, key, ep) == 0);
410
411 /*
412 * Reach here with the last page that was looked at pinned,
413 * which may or may not be the same as the last (or original)
414 * match page. If it's not useful, release it.
415 */
416 if (h->pgno != save.page->pgno)
417 mpool_put(t->bt_mp, h, 0);
418
419 *erval = save;
420 return (RET_SUCCESS);
421 }
422
423 /* If at the end of a page, find the next entry. */
424 if (ep->index == NEXTINDEX(ep->page)) {
425 h = ep->page;
426 pg = h->nextpg;
427 mpool_put(t->bt_mp, h, 0);
428 if (pg == P_INVALID)
429 return (RET_SPECIAL);
430 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
431 return (RET_ERROR);
432 ep->index = 0;
433 ep->page = h;
434 }
435 *erval = *ep;
436 return (RET_SUCCESS);
437 }
438
439 /*
440 * __bt_setcur --
441 * Set the cursor to an entry in the tree.
442 *
443 * Parameters:
444 * t: the tree
445 * pgno: page number
446 * index: page index
447 */
448 void
449 __bt_setcur(t, pgno, index)
450 BTREE *t;
451 pgno_t pgno;
452 u_int index;
453 {
454 /* Lose any already deleted key. */
455 if (t->bt_cursor.key.data != NULL) {
456 free(t->bt_cursor.key.data);
457 t->bt_cursor.key.size = 0;
458 t->bt_cursor.key.data = NULL;
459 }
460 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
461
462 /* Update the cursor. */
463 t->bt_cursor.pg.pgno = pgno;
464 t->bt_cursor.pg.index = index;
465 F_SET(&t->bt_cursor, CURS_INIT);
466 }
467