1 1.20 christos /* $NetBSD: bt_seq.c,v 1.20 2016/09/24 21:31:25 christos Exp $ */ 2 1.6 cgd 3 1.1 cgd /*- 4 1.7 cgd * Copyright (c) 1990, 1993, 1994 5 1.1 cgd * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * This code is derived from software contributed to Berkeley by 8 1.1 cgd * Mike Olson. 9 1.1 cgd * 10 1.1 cgd * Redistribution and use in source and binary forms, with or without 11 1.1 cgd * modification, are permitted provided that the following conditions 12 1.1 cgd * are met: 13 1.1 cgd * 1. Redistributions of source code must retain the above copyright 14 1.1 cgd * notice, this list of conditions and the following disclaimer. 15 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 cgd * notice, this list of conditions and the following disclaimer in the 17 1.1 cgd * documentation and/or other materials provided with the distribution. 18 1.13 agc * 3. Neither the name of the University nor the names of its contributors 19 1.1 cgd * may be used to endorse or promote products derived from this software 20 1.1 cgd * without specific prior written permission. 21 1.1 cgd * 22 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.1 cgd * SUCH DAMAGE. 33 1.1 cgd */ 34 1.1 cgd 35 1.17 joerg #if HAVE_NBTOOL_CONFIG_H 36 1.17 joerg #include "nbtool_config.h" 37 1.17 joerg #endif 38 1.17 joerg 39 1.9 christos #include <sys/cdefs.h> 40 1.20 christos __RCSID("$NetBSD: bt_seq.c,v 1.20 2016/09/24 21:31:25 christos Exp $"); 41 1.1 cgd 42 1.10 jtc #include "namespace.h" 43 1.1 cgd #include <sys/types.h> 44 1.1 cgd 45 1.15 christos #include <assert.h> 46 1.1 cgd #include <errno.h> 47 1.1 cgd #include <stddef.h> 48 1.1 cgd #include <stdio.h> 49 1.1 cgd #include <stdlib.h> 50 1.1 cgd 51 1.1 cgd #include <db.h> 52 1.1 cgd #include "btree.h" 53 1.1 cgd 54 1.15 christos static int __bt_first(BTREE *, const DBT *, EPG *, int *); 55 1.15 christos static int __bt_seqadv(BTREE *, EPG *, int); 56 1.15 christos static int __bt_seqset(BTREE *, EPG *, DBT *, int); 57 1.19 christos static int __bt_rseq_next(BTREE *, EPG *); 58 1.19 christos static int __bt_rseq_prev(BTREE *, EPG *); 59 1.1 cgd 60 1.1 cgd /* 61 1.1 cgd * Sequential scan support. 62 1.1 cgd * 63 1.7 cgd * The tree can be scanned sequentially, starting from either end of the 64 1.7 cgd * tree or from any specific key. A scan request before any scanning is 65 1.7 cgd * done is initialized as starting from the least node. 66 1.1 cgd */ 67 1.1 cgd 68 1.1 cgd /* 69 1.7 cgd * __bt_seq -- 70 1.7 cgd * Btree sequential scan interface. 71 1.1 cgd * 72 1.1 cgd * Parameters: 73 1.1 cgd * dbp: pointer to access method 74 1.1 cgd * key: key for positioning and return value 75 1.1 cgd * data: data return value 76 1.19 christos * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV, R_RNEXT, R_RPREV. 77 1.1 cgd * 78 1.1 cgd * Returns: 79 1.1 cgd * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 80 1.1 cgd */ 81 1.1 cgd int 82 1.15 christos __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags) 83 1.1 cgd { 84 1.1 cgd BTREE *t; 85 1.1 cgd EPG e; 86 1.1 cgd int status; 87 1.1 cgd 88 1.4 cgd t = dbp->internal; 89 1.4 cgd 90 1.4 cgd /* Toss any page pinned across calls. */ 91 1.4 cgd if (t->bt_pinned != NULL) { 92 1.4 cgd mpool_put(t->bt_mp, t->bt_pinned, 0); 93 1.4 cgd t->bt_pinned = NULL; 94 1.4 cgd } 95 1.4 cgd 96 1.1 cgd /* 97 1.18 ryoon * If scan uninitialized as yet, or starting at a specific record, set 98 1.7 cgd * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin 99 1.7 cgd * the page the cursor references if they're successful. 100 1.1 cgd */ 101 1.7 cgd switch (flags) { 102 1.1 cgd case R_NEXT: 103 1.1 cgd case R_PREV: 104 1.19 christos case R_RNEXT: 105 1.19 christos case R_RPREV: 106 1.7 cgd if (F_ISSET(&t->bt_cursor, CURS_INIT)) { 107 1.11 christos status = __bt_seqadv(t, &e, (int)flags); 108 1.1 cgd break; 109 1.1 cgd } 110 1.1 cgd /* FALLTHROUGH */ 111 1.1 cgd case R_FIRST: 112 1.1 cgd case R_LAST: 113 1.7 cgd case R_CURSOR: 114 1.11 christos status = __bt_seqset(t, &e, key, (int)flags); 115 1.1 cgd break; 116 1.1 cgd default: 117 1.1 cgd errno = EINVAL; 118 1.1 cgd return (RET_ERROR); 119 1.1 cgd } 120 1.1 cgd 121 1.1 cgd if (status == RET_SUCCESS) { 122 1.11 christos __bt_setcur(t, e.page->pgno, (u_int)e.index); 123 1.1 cgd 124 1.7 cgd status = 125 1.7 cgd __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0); 126 1.4 cgd 127 1.4 cgd /* 128 1.4 cgd * If the user is doing concurrent access, we copied the 129 1.4 cgd * key/data, toss the page. 130 1.4 cgd */ 131 1.7 cgd if (F_ISSET(t, B_DB_LOCK)) 132 1.4 cgd mpool_put(t->bt_mp, e.page, 0); 133 1.4 cgd else 134 1.4 cgd t->bt_pinned = e.page; 135 1.1 cgd } 136 1.1 cgd return (status); 137 1.1 cgd } 138 1.1 cgd 139 1.1 cgd /* 140 1.7 cgd * __bt_seqset -- 141 1.7 cgd * Set the sequential scan to a specific key. 142 1.1 cgd * 143 1.1 cgd * Parameters: 144 1.1 cgd * t: tree 145 1.1 cgd * ep: storage for returned key 146 1.1 cgd * key: key for initial scan position 147 1.19 christos * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV, R_RNEXT, R_RPREV. 148 1.1 cgd * 149 1.1 cgd * Side effects: 150 1.1 cgd * Pins the page the cursor references. 151 1.1 cgd * 152 1.1 cgd * Returns: 153 1.1 cgd * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 154 1.1 cgd */ 155 1.1 cgd static int 156 1.15 christos __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags) 157 1.1 cgd { 158 1.1 cgd PAGE *h; 159 1.1 cgd pgno_t pg; 160 1.1 cgd int exact; 161 1.1 cgd 162 1.1 cgd /* 163 1.7 cgd * Find the first, last or specific key in the tree and point the 164 1.7 cgd * cursor at it. The cursor may not be moved until a new key has 165 1.7 cgd * been found. 166 1.1 cgd */ 167 1.7 cgd switch (flags) { 168 1.1 cgd case R_CURSOR: /* Keyed scan. */ 169 1.1 cgd /* 170 1.7 cgd * Find the first instance of the key or the smallest key 171 1.7 cgd * which is greater than or equal to the specified key. 172 1.1 cgd */ 173 1.1 cgd if (key->data == NULL || key->size == 0) { 174 1.1 cgd errno = EINVAL; 175 1.1 cgd return (RET_ERROR); 176 1.1 cgd } 177 1.7 cgd return (__bt_first(t, key, ep, &exact)); 178 1.1 cgd case R_FIRST: /* First record. */ 179 1.1 cgd case R_NEXT: 180 1.19 christos case R_RNEXT: 181 1.19 christos BT_CLR(t); 182 1.1 cgd /* Walk down the left-hand side of the tree. */ 183 1.1 cgd for (pg = P_ROOT;;) { 184 1.20 christos if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 185 1.1 cgd return (RET_ERROR); 186 1.7 cgd 187 1.7 cgd /* Check for an empty tree. */ 188 1.7 cgd if (NEXTINDEX(h) == 0) { 189 1.7 cgd mpool_put(t->bt_mp, h, 0); 190 1.7 cgd return (RET_SPECIAL); 191 1.7 cgd } 192 1.7 cgd 193 1.1 cgd if (h->flags & (P_BLEAF | P_RLEAF)) 194 1.1 cgd break; 195 1.1 cgd pg = GETBINTERNAL(h, 0)->pgno; 196 1.19 christos BT_PUSH(t, h->pgno, 0); 197 1.1 cgd mpool_put(t->bt_mp, h, 0); 198 1.1 cgd } 199 1.1 cgd ep->page = h; 200 1.1 cgd ep->index = 0; 201 1.1 cgd break; 202 1.1 cgd case R_LAST: /* Last record. */ 203 1.1 cgd case R_PREV: 204 1.19 christos case R_RPREV: 205 1.19 christos BT_CLR(t); 206 1.1 cgd /* Walk down the right-hand side of the tree. */ 207 1.1 cgd for (pg = P_ROOT;;) { 208 1.20 christos if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 209 1.1 cgd return (RET_ERROR); 210 1.7 cgd 211 1.7 cgd /* Check for an empty tree. */ 212 1.7 cgd if (NEXTINDEX(h) == 0) { 213 1.7 cgd mpool_put(t->bt_mp, h, 0); 214 1.7 cgd return (RET_SPECIAL); 215 1.7 cgd } 216 1.7 cgd 217 1.1 cgd if (h->flags & (P_BLEAF | P_RLEAF)) 218 1.1 cgd break; 219 1.1 cgd pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; 220 1.19 christos BT_PUSH(t, h->pgno, NEXTINDEX(h) - 1); 221 1.1 cgd mpool_put(t->bt_mp, h, 0); 222 1.1 cgd } 223 1.1 cgd 224 1.1 cgd ep->page = h; 225 1.1 cgd ep->index = NEXTINDEX(h) - 1; 226 1.1 cgd break; 227 1.1 cgd } 228 1.1 cgd return (RET_SUCCESS); 229 1.1 cgd } 230 1.1 cgd 231 1.1 cgd /* 232 1.7 cgd * __bt_seqadvance -- 233 1.7 cgd * Advance the sequential scan. 234 1.1 cgd * 235 1.1 cgd * Parameters: 236 1.1 cgd * t: tree 237 1.19 christos * flags: R_NEXT, R_PREV, R_RNEXT, R_RPREV 238 1.1 cgd * 239 1.1 cgd * Side effects: 240 1.1 cgd * Pins the page the new key/data record is on. 241 1.1 cgd * 242 1.1 cgd * Returns: 243 1.1 cgd * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. 244 1.1 cgd */ 245 1.1 cgd static int 246 1.15 christos __bt_seqadv(BTREE *t, EPG *ep, int flags) 247 1.1 cgd { 248 1.7 cgd CURSOR *c; 249 1.1 cgd PAGE *h; 250 1.12 thorpej indx_t idx = 0; /* pacify gcc */ 251 1.1 cgd pgno_t pg; 252 1.19 christos int exact, rval; 253 1.7 cgd 254 1.7 cgd /* 255 1.7 cgd * There are a couple of states that we can be in. The cursor has 256 1.7 cgd * been initialized by the time we get here, but that's all we know. 257 1.7 cgd */ 258 1.7 cgd c = &t->bt_cursor; 259 1.1 cgd 260 1.7 cgd /* 261 1.19 christos * The cursor was deleted and there weren't any duplicate records, 262 1.19 christos * so the cursor's key was saved. Find out where that key would 263 1.19 christos * be in the current tree. If the returned key is an exact match, 264 1.19 christos * it means that a key/data pair was inserted into the tree after 265 1.19 christos * the delete. We could reasonably return the key, but the problem 266 1.19 christos * is that this is the access pattern we'll see if the user is 267 1.19 christos * doing seq(..., R_NEXT)/put(..., 0) pairs, i.e. the put deletes 268 1.19 christos * the cursor record and then replaces it, so the cursor was saved, 269 1.19 christos * and we'll simply return the same "new" record until the user 270 1.19 christos * notices and doesn't do a put() of it. Since the key is an exact 271 1.19 christos * match, we could as easily put the new record before the cursor, 272 1.19 christos * and we've made no guarantee to return it. So, move forward or 273 1.19 christos * back a record if it's an exact match. 274 1.19 christos * 275 1.19 christos * XXX 276 1.19 christos * In the current implementation, put's to the cursor are done with 277 1.19 christos * delete/add pairs. This has two consequences. First, it means 278 1.19 christos * that seq(..., R_NEXT)/put(..., R_CURSOR) pairs are going to exhibit 279 1.19 christos * the same behavior as above. Second, you can return the same key 280 1.19 christos * twice if you have duplicate records. The scenario is that the 281 1.19 christos * cursor record is deleted, moving the cursor forward or backward 282 1.19 christos * to a duplicate. The add then inserts the new record at a location 283 1.19 christos * ahead of the cursor because duplicates aren't sorted in any way, 284 1.19 christos * and the new record is later returned. This has to be fixed at some 285 1.19 christos * point. 286 1.7 cgd */ 287 1.19 christos if (F_ISSET(c, CURS_ACQUIRE)) { 288 1.19 christos if ((rval = __bt_first(t, &c->key, ep, &exact)) == RET_ERROR) 289 1.19 christos return RET_ERROR; 290 1.19 christos if (!exact) 291 1.19 christos return rval; 292 1.19 christos /* 293 1.19 christos * XXX 294 1.19 christos * Kluge -- get, release, get the page. 295 1.19 christos */ 296 1.19 christos c->pg.pgno = ep->page->pgno; 297 1.19 christos c->pg.index = ep->index; 298 1.19 christos mpool_put(t->bt_mp, ep->page, 0); 299 1.19 christos } 300 1.1 cgd 301 1.7 cgd /* Get the page referenced by the cursor. */ 302 1.20 christos if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL) 303 1.1 cgd return (RET_ERROR); 304 1.1 cgd 305 1.1 cgd /* 306 1.7 cgd * Find the next/previous record in the tree and point the cursor at 307 1.7 cgd * it. The cursor may not be moved until a new key has been found. 308 1.1 cgd */ 309 1.7 cgd switch (flags) { 310 1.1 cgd case R_NEXT: /* Next record. */ 311 1.19 christos case R_RNEXT: 312 1.7 cgd /* 313 1.7 cgd * The cursor was deleted in duplicate records, and moved 314 1.7 cgd * forward to a record that has yet to be returned. Clear 315 1.7 cgd * that flag, and return the record. 316 1.7 cgd */ 317 1.7 cgd if (F_ISSET(c, CURS_AFTER)) 318 1.7 cgd goto usecurrent; 319 1.12 thorpej idx = c->pg.index; 320 1.12 thorpej if (++idx == NEXTINDEX(h)) { 321 1.19 christos if (flags == R_RNEXT) { 322 1.19 christos ep->page = h; 323 1.19 christos ep->index = idx; 324 1.19 christos return __bt_rseq_next(t, ep); 325 1.19 christos } 326 1.7 cgd pg = h->nextpg; 327 1.7 cgd mpool_put(t->bt_mp, h, 0); 328 1.7 cgd if (pg == P_INVALID) 329 1.19 christos return RET_SPECIAL; 330 1.20 christos if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 331 1.19 christos return RET_ERROR; 332 1.12 thorpej idx = 0; 333 1.1 cgd } 334 1.1 cgd break; 335 1.1 cgd case R_PREV: /* Previous record. */ 336 1.19 christos case R_RPREV: 337 1.7 cgd /* 338 1.7 cgd * The cursor was deleted in duplicate records, and moved 339 1.7 cgd * backward to a record that has yet to be returned. Clear 340 1.7 cgd * that flag, and return the record. 341 1.7 cgd */ 342 1.7 cgd if (F_ISSET(c, CURS_BEFORE)) { 343 1.7 cgd usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE); 344 1.7 cgd ep->page = h; 345 1.7 cgd ep->index = c->pg.index; 346 1.7 cgd return (RET_SUCCESS); 347 1.7 cgd } 348 1.12 thorpej idx = c->pg.index; 349 1.12 thorpej if (idx == 0) { 350 1.19 christos if (flags == R_RPREV) { 351 1.19 christos ep->page = h; 352 1.19 christos ep->index = idx; 353 1.19 christos return __bt_rseq_prev(t, ep); 354 1.19 christos } 355 1.7 cgd pg = h->prevpg; 356 1.7 cgd mpool_put(t->bt_mp, h, 0); 357 1.7 cgd if (pg == P_INVALID) 358 1.19 christos return RET_SPECIAL; 359 1.20 christos if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 360 1.19 christos return RET_ERROR; 361 1.12 thorpej idx = NEXTINDEX(h) - 1; 362 1.7 cgd } else 363 1.12 thorpej --idx; 364 1.1 cgd break; 365 1.1 cgd } 366 1.1 cgd 367 1.7 cgd ep->page = h; 368 1.12 thorpej ep->index = idx; 369 1.7 cgd return (RET_SUCCESS); 370 1.7 cgd } 371 1.19 christos /* 372 1.19 christos * Get the first item on the next page, but by going up and down the tree. 373 1.19 christos */ 374 1.19 christos static int 375 1.19 christos __bt_rseq_next(BTREE *t, EPG *ep) 376 1.19 christos { 377 1.19 christos PAGE *h; 378 1.19 christos indx_t idx; 379 1.19 christos EPGNO *up; 380 1.19 christos pgno_t pg; 381 1.19 christos 382 1.19 christos h = ep->page; 383 1.19 christos idx = ep->index; 384 1.19 christos do { 385 1.19 christos /* Move up the tree. */ 386 1.19 christos up = BT_POP(t); 387 1.19 christos mpool_put(t->bt_mp, h, 0); 388 1.19 christos /* Did we hit the right edge of the root? */ 389 1.19 christos if (up == NULL) 390 1.19 christos return RET_SPECIAL; 391 1.20 christos if ((h = mpool_get(t->bt_mp, up->pgno, 0)) == NULL) 392 1.19 christos return RET_ERROR; 393 1.19 christos idx = up->index; 394 1.19 christos } while (++idx == NEXTINDEX(h)); 395 1.19 christos 396 1.19 christos while (!(h->flags & (P_BLEAF | P_RLEAF))) { 397 1.19 christos /* Move back down the tree. */ 398 1.19 christos BT_PUSH(t, h->pgno, idx); 399 1.19 christos pg = GETBINTERNAL(h, idx)->pgno; 400 1.19 christos mpool_put(t->bt_mp, h, 0); 401 1.20 christos if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 402 1.19 christos return RET_ERROR; 403 1.19 christos idx = 0; 404 1.19 christos } 405 1.19 christos ep->page = h; 406 1.19 christos ep->index = idx; 407 1.19 christos return RET_SUCCESS; 408 1.19 christos } 409 1.19 christos 410 1.19 christos /* 411 1.19 christos * Get the last item on the previous page, but by going up and down the tree. 412 1.19 christos */ 413 1.19 christos static int 414 1.19 christos __bt_rseq_prev(BTREE *t, EPG *ep) 415 1.19 christos { 416 1.19 christos PAGE *h; 417 1.19 christos indx_t idx; 418 1.19 christos EPGNO *up; 419 1.19 christos pgno_t pg; 420 1.19 christos 421 1.19 christos h = ep->page; 422 1.19 christos idx = ep->index; 423 1.19 christos do { 424 1.19 christos /* Move up the tree. */ 425 1.19 christos up = BT_POP(t); 426 1.19 christos mpool_put(t->bt_mp, h, 0); 427 1.19 christos /* Did we hit the left edge of the root? */ 428 1.19 christos if (up == NULL) 429 1.19 christos return RET_SPECIAL; 430 1.20 christos if ((h = mpool_get(t->bt_mp, up->pgno, 0)) == NULL) 431 1.19 christos return RET_ERROR; 432 1.19 christos idx = up->index; 433 1.19 christos } while (idx == 0); 434 1.19 christos --idx; 435 1.19 christos while (!(h->flags & (P_BLEAF | P_RLEAF))) { 436 1.19 christos /* Move back down the tree. */ 437 1.19 christos BT_PUSH(t, h->pgno, idx); 438 1.19 christos pg = GETBINTERNAL(h, idx)->pgno; 439 1.19 christos mpool_put(t->bt_mp, h, 0); 440 1.20 christos if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 441 1.19 christos return RET_ERROR; 442 1.19 christos idx = NEXTINDEX(h) - 1; 443 1.19 christos } 444 1.19 christos ep->page = h; 445 1.19 christos ep->index = idx; 446 1.19 christos return RET_SUCCESS; 447 1.19 christos } 448 1.7 cgd 449 1.7 cgd /* 450 1.7 cgd * __bt_first -- 451 1.7 cgd * Find the first entry. 452 1.7 cgd * 453 1.7 cgd * Parameters: 454 1.7 cgd * t: the tree 455 1.7 cgd * key: the key 456 1.7 cgd * erval: return EPG 457 1.7 cgd * exactp: pointer to exact match flag 458 1.7 cgd * 459 1.7 cgd * Returns: 460 1.7 cgd * The first entry in the tree greater than or equal to key, 461 1.7 cgd * or RET_SPECIAL if no such key exists. 462 1.7 cgd */ 463 1.7 cgd static int 464 1.15 christos __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp) 465 1.7 cgd { 466 1.19 christos PAGE *h, *hprev; 467 1.7 cgd EPG *ep, save; 468 1.7 cgd pgno_t pg; 469 1.1 cgd 470 1.1 cgd /* 471 1.7 cgd * Find any matching record; __bt_search pins the page. 472 1.7 cgd * 473 1.7 cgd * If it's an exact match and duplicates are possible, walk backwards 474 1.7 cgd * in the tree until we find the first one. Otherwise, make sure it's 475 1.7 cgd * a valid key (__bt_search may return an index just past the end of a 476 1.7 cgd * page) and return it. 477 1.1 cgd */ 478 1.7 cgd if ((ep = __bt_search(t, key, exactp)) == NULL) 479 1.19 christos return RET_SPECIAL; 480 1.7 cgd if (*exactp) { 481 1.7 cgd if (F_ISSET(t, B_NODUPS)) { 482 1.7 cgd *erval = *ep; 483 1.7 cgd return (RET_SUCCESS); 484 1.7 cgd } 485 1.19 christos 486 1.7 cgd /* 487 1.7 cgd * Walk backwards, as long as the entry matches and there are 488 1.7 cgd * keys left in the tree. Save a copy of each match in case 489 1.7 cgd * we go too far. 490 1.7 cgd */ 491 1.7 cgd save = *ep; 492 1.7 cgd h = ep->page; 493 1.7 cgd do { 494 1.7 cgd if (save.page->pgno != ep->page->pgno) { 495 1.7 cgd mpool_put(t->bt_mp, save.page, 0); 496 1.7 cgd save = *ep; 497 1.7 cgd } else 498 1.7 cgd save.index = ep->index; 499 1.7 cgd 500 1.7 cgd /* 501 1.7 cgd * Don't unpin the page the last (or original) match 502 1.7 cgd * was on, but make sure it's unpinned if an error 503 1.7 cgd * occurs. 504 1.7 cgd */ 505 1.7 cgd if (ep->index == 0) { 506 1.7 cgd if (h->prevpg == P_INVALID) 507 1.7 cgd break; 508 1.7 cgd if (h->pgno != save.page->pgno) 509 1.7 cgd mpool_put(t->bt_mp, h, 0); 510 1.20 christos if ((hprev = mpool_get(t->bt_mp, 511 1.19 christos h->prevpg, 0)) == NULL) { 512 1.19 christos if (h->pgno == save.page->pgno) 513 1.19 christos mpool_put(t->bt_mp, 514 1.19 christos save.page, 0); 515 1.19 christos return RET_ERROR; 516 1.19 christos } 517 1.19 christos ep->page = h = hprev; 518 1.7 cgd ep->index = NEXTINDEX(h); 519 1.7 cgd } 520 1.7 cgd --ep->index; 521 1.7 cgd } while (__bt_cmp(t, key, ep) == 0); 522 1.7 cgd 523 1.7 cgd /* 524 1.7 cgd * Reach here with the last page that was looked at pinned, 525 1.7 cgd * which may or may not be the same as the last (or original) 526 1.7 cgd * match page. If it's not useful, release it. 527 1.7 cgd */ 528 1.7 cgd if (h->pgno != save.page->pgno) 529 1.7 cgd mpool_put(t->bt_mp, h, 0); 530 1.7 cgd 531 1.7 cgd *erval = save; 532 1.7 cgd return (RET_SUCCESS); 533 1.7 cgd } 534 1.7 cgd 535 1.7 cgd /* If at the end of a page, find the next entry. */ 536 1.7 cgd if (ep->index == NEXTINDEX(ep->page)) { 537 1.7 cgd h = ep->page; 538 1.7 cgd pg = h->nextpg; 539 1.7 cgd mpool_put(t->bt_mp, h, 0); 540 1.7 cgd if (pg == P_INVALID) 541 1.7 cgd return (RET_SPECIAL); 542 1.20 christos if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 543 1.1 cgd return (RET_ERROR); 544 1.7 cgd ep->index = 0; 545 1.7 cgd ep->page = h; 546 1.1 cgd } 547 1.7 cgd *erval = *ep; 548 1.1 cgd return (RET_SUCCESS); 549 1.1 cgd } 550 1.1 cgd 551 1.1 cgd /* 552 1.7 cgd * __bt_setcur -- 553 1.7 cgd * Set the cursor to an entry in the tree. 554 1.1 cgd * 555 1.1 cgd * Parameters: 556 1.7 cgd * t: the tree 557 1.7 cgd * pgno: page number 558 1.12 thorpej * idx: page index 559 1.1 cgd */ 560 1.7 cgd void 561 1.15 christos __bt_setcur(BTREE *t, pgno_t pgno, u_int idx) 562 1.1 cgd { 563 1.7 cgd /* Lose any already deleted key. */ 564 1.7 cgd if (t->bt_cursor.key.data != NULL) { 565 1.7 cgd free(t->bt_cursor.key.data); 566 1.7 cgd t->bt_cursor.key.size = 0; 567 1.7 cgd t->bt_cursor.key.data = NULL; 568 1.7 cgd } 569 1.7 cgd F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE); 570 1.1 cgd 571 1.7 cgd /* Update the cursor. */ 572 1.7 cgd t->bt_cursor.pg.pgno = pgno; 573 1.12 thorpej t->bt_cursor.pg.index = idx; 574 1.7 cgd F_SET(&t->bt_cursor, CURS_INIT); 575 1.1 cgd } 576