Home | History | Annotate | Line # | Download | only in btree
bt_overflow.c revision 1.1.1.1
      1      1.1  cgd /*-
      2  1.1.1.1  cgd  * Copyright (c) 1990, 1993, 1994
      3      1.1  cgd  *	The Regents of the University of California.  All rights reserved.
      4      1.1  cgd  *
      5      1.1  cgd  * This code is derived from software contributed to Berkeley by
      6      1.1  cgd  * Mike Olson.
      7      1.1  cgd  *
      8      1.1  cgd  * Redistribution and use in source and binary forms, with or without
      9      1.1  cgd  * modification, are permitted provided that the following conditions
     10      1.1  cgd  * are met:
     11      1.1  cgd  * 1. Redistributions of source code must retain the above copyright
     12      1.1  cgd  *    notice, this list of conditions and the following disclaimer.
     13      1.1  cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14      1.1  cgd  *    notice, this list of conditions and the following disclaimer in the
     15      1.1  cgd  *    documentation and/or other materials provided with the distribution.
     16      1.1  cgd  * 3. All advertising materials mentioning features or use of this software
     17      1.1  cgd  *    must display the following acknowledgement:
     18      1.1  cgd  *	This product includes software developed by the University of
     19      1.1  cgd  *	California, Berkeley and its contributors.
     20      1.1  cgd  * 4. Neither the name of the University nor the names of its contributors
     21      1.1  cgd  *    may be used to endorse or promote products derived from this software
     22      1.1  cgd  *    without specific prior written permission.
     23      1.1  cgd  *
     24      1.1  cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25      1.1  cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26      1.1  cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27      1.1  cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28      1.1  cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29      1.1  cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30      1.1  cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31      1.1  cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32      1.1  cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33      1.1  cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34      1.1  cgd  * SUCH DAMAGE.
     35      1.1  cgd  */
     36      1.1  cgd 
     37      1.1  cgd #if defined(LIBC_SCCS) && !defined(lint)
     38  1.1.1.1  cgd static char sccsid[] = "@(#)bt_overflow.c	8.4 (Berkeley) 6/20/94";
     39      1.1  cgd #endif /* LIBC_SCCS and not lint */
     40      1.1  cgd 
     41      1.1  cgd #include <sys/param.h>
     42      1.1  cgd 
     43      1.1  cgd #include <stdio.h>
     44      1.1  cgd #include <stdlib.h>
     45      1.1  cgd #include <string.h>
     46      1.1  cgd 
     47      1.1  cgd #include <db.h>
     48      1.1  cgd #include "btree.h"
     49      1.1  cgd 
     50      1.1  cgd /*
     51      1.1  cgd  * Big key/data code.
     52      1.1  cgd  *
     53      1.1  cgd  * Big key and data entries are stored on linked lists of pages.  The initial
     54      1.1  cgd  * reference is byte string stored with the key or data and is the page number
     55      1.1  cgd  * and size.  The actual record is stored in a chain of pages linked by the
     56      1.1  cgd  * nextpg field of the PAGE header.
     57      1.1  cgd  *
     58      1.1  cgd  * The first page of the chain has a special property.  If the record is used
     59      1.1  cgd  * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
     60      1.1  cgd  * in the header.
     61      1.1  cgd  *
     62      1.1  cgd  * XXX
     63      1.1  cgd  * A single DBT is written to each chain, so a lot of space on the last page
     64      1.1  cgd  * is wasted.  This is a fairly major bug for some data sets.
     65      1.1  cgd  */
     66      1.1  cgd 
     67      1.1  cgd /*
     68      1.1  cgd  * __OVFL_GET -- Get an overflow key/data item.
     69      1.1  cgd  *
     70      1.1  cgd  * Parameters:
     71      1.1  cgd  *	t:	tree
     72  1.1.1.1  cgd  *	p:	pointer to { pgno_t, u_int32_t }
     73      1.1  cgd  *	buf:	storage address
     74      1.1  cgd  *	bufsz:	storage size
     75      1.1  cgd  *
     76      1.1  cgd  * Returns:
     77      1.1  cgd  *	RET_ERROR, RET_SUCCESS
     78      1.1  cgd  */
     79      1.1  cgd int
     80      1.1  cgd __ovfl_get(t, p, ssz, buf, bufsz)
     81      1.1  cgd 	BTREE *t;
     82      1.1  cgd 	void *p;
     83      1.1  cgd 	size_t *ssz;
     84      1.1  cgd 	char **buf;
     85      1.1  cgd 	size_t *bufsz;
     86      1.1  cgd {
     87      1.1  cgd 	PAGE *h;
     88      1.1  cgd 	pgno_t pg;
     89  1.1.1.1  cgd 	size_t nb, plen;
     90  1.1.1.1  cgd 	u_int32_t sz;
     91      1.1  cgd 
     92      1.1  cgd 	memmove(&pg, p, sizeof(pgno_t));
     93  1.1.1.1  cgd 	memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
     94      1.1  cgd 	*ssz = sz;
     95      1.1  cgd 
     96      1.1  cgd #ifdef DEBUG
     97      1.1  cgd 	if (pg == P_INVALID || sz == 0)
     98      1.1  cgd 		abort();
     99      1.1  cgd #endif
    100      1.1  cgd 	/* Make the buffer bigger as necessary. */
    101      1.1  cgd 	if (*bufsz < sz) {
    102  1.1.1.1  cgd 		*buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz));
    103  1.1.1.1  cgd 		if (*buf == NULL)
    104      1.1  cgd 			return (RET_ERROR);
    105      1.1  cgd 		*bufsz = sz;
    106      1.1  cgd 	}
    107      1.1  cgd 
    108      1.1  cgd 	/*
    109      1.1  cgd 	 * Step through the linked list of pages, copying the data on each one
    110      1.1  cgd 	 * into the buffer.  Never copy more than the data's length.
    111      1.1  cgd 	 */
    112      1.1  cgd 	plen = t->bt_psize - BTDATAOFF;
    113      1.1  cgd 	for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
    114      1.1  cgd 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    115      1.1  cgd 			return (RET_ERROR);
    116      1.1  cgd 
    117      1.1  cgd 		nb = MIN(sz, plen);
    118      1.1  cgd 		memmove(p, (char *)h + BTDATAOFF, nb);
    119      1.1  cgd 		mpool_put(t->bt_mp, h, 0);
    120      1.1  cgd 
    121      1.1  cgd 		if ((sz -= nb) == 0)
    122      1.1  cgd 			break;
    123      1.1  cgd 	}
    124      1.1  cgd 	return (RET_SUCCESS);
    125      1.1  cgd }
    126      1.1  cgd 
    127      1.1  cgd /*
    128      1.1  cgd  * __OVFL_PUT -- Store an overflow key/data item.
    129      1.1  cgd  *
    130      1.1  cgd  * Parameters:
    131      1.1  cgd  *	t:	tree
    132      1.1  cgd  *	data:	DBT to store
    133      1.1  cgd  *	pgno:	storage page number
    134      1.1  cgd  *
    135      1.1  cgd  * Returns:
    136      1.1  cgd  *	RET_ERROR, RET_SUCCESS
    137      1.1  cgd  */
    138      1.1  cgd int
    139      1.1  cgd __ovfl_put(t, dbt, pg)
    140      1.1  cgd 	BTREE *t;
    141      1.1  cgd 	const DBT *dbt;
    142      1.1  cgd 	pgno_t *pg;
    143      1.1  cgd {
    144      1.1  cgd 	PAGE *h, *last;
    145      1.1  cgd 	void *p;
    146      1.1  cgd 	pgno_t npg;
    147  1.1.1.1  cgd 	size_t nb, plen;
    148  1.1.1.1  cgd 	u_int32_t sz;
    149      1.1  cgd 
    150      1.1  cgd 	/*
    151      1.1  cgd 	 * Allocate pages and copy the key/data record into them.  Store the
    152      1.1  cgd 	 * number of the first page in the chain.
    153      1.1  cgd 	 */
    154      1.1  cgd 	plen = t->bt_psize - BTDATAOFF;
    155      1.1  cgd 	for (last = NULL, p = dbt->data, sz = dbt->size;;
    156      1.1  cgd 	    p = (char *)p + plen, last = h) {
    157      1.1  cgd 		if ((h = __bt_new(t, &npg)) == NULL)
    158      1.1  cgd 			return (RET_ERROR);
    159      1.1  cgd 
    160      1.1  cgd 		h->pgno = npg;
    161      1.1  cgd 		h->nextpg = h->prevpg = P_INVALID;
    162      1.1  cgd 		h->flags = P_OVERFLOW;
    163      1.1  cgd 		h->lower = h->upper = 0;
    164      1.1  cgd 
    165      1.1  cgd 		nb = MIN(sz, plen);
    166      1.1  cgd 		memmove((char *)h + BTDATAOFF, p, nb);
    167      1.1  cgd 
    168      1.1  cgd 		if (last) {
    169      1.1  cgd 			last->nextpg = h->pgno;
    170      1.1  cgd 			mpool_put(t->bt_mp, last, MPOOL_DIRTY);
    171      1.1  cgd 		} else
    172      1.1  cgd 			*pg = h->pgno;
    173      1.1  cgd 
    174      1.1  cgd 		if ((sz -= nb) == 0) {
    175      1.1  cgd 			mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    176      1.1  cgd 			break;
    177      1.1  cgd 		}
    178      1.1  cgd 	}
    179      1.1  cgd 	return (RET_SUCCESS);
    180      1.1  cgd }
    181      1.1  cgd 
    182      1.1  cgd /*
    183      1.1  cgd  * __OVFL_DELETE -- Delete an overflow chain.
    184      1.1  cgd  *
    185      1.1  cgd  * Parameters:
    186      1.1  cgd  *	t:	tree
    187  1.1.1.1  cgd  *	p:	pointer to { pgno_t, u_int32_t }
    188      1.1  cgd  *
    189      1.1  cgd  * Returns:
    190      1.1  cgd  *	RET_ERROR, RET_SUCCESS
    191      1.1  cgd  */
    192      1.1  cgd int
    193      1.1  cgd __ovfl_delete(t, p)
    194      1.1  cgd 	BTREE *t;
    195      1.1  cgd 	void *p;
    196      1.1  cgd {
    197      1.1  cgd 	PAGE *h;
    198      1.1  cgd 	pgno_t pg;
    199  1.1.1.1  cgd 	size_t plen;
    200  1.1.1.1  cgd 	u_int32_t sz;
    201      1.1  cgd 
    202      1.1  cgd 	memmove(&pg, p, sizeof(pgno_t));
    203  1.1.1.1  cgd 	memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t));
    204      1.1  cgd 
    205      1.1  cgd #ifdef DEBUG
    206      1.1  cgd 	if (pg == P_INVALID || sz == 0)
    207      1.1  cgd 		abort();
    208      1.1  cgd #endif
    209      1.1  cgd 	if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    210      1.1  cgd 		return (RET_ERROR);
    211      1.1  cgd 
    212      1.1  cgd 	/* Don't delete chains used by internal pages. */
    213      1.1  cgd 	if (h->flags & P_PRESERVE) {
    214      1.1  cgd 		mpool_put(t->bt_mp, h, 0);
    215      1.1  cgd 		return (RET_SUCCESS);
    216      1.1  cgd 	}
    217      1.1  cgd 
    218      1.1  cgd 	/* Step through the chain, calling the free routine for each page. */
    219      1.1  cgd 	for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) {
    220      1.1  cgd 		pg = h->nextpg;
    221      1.1  cgd 		__bt_free(t, h);
    222      1.1  cgd 		if (sz <= plen)
    223      1.1  cgd 			break;
    224      1.1  cgd 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    225      1.1  cgd 			return (RET_ERROR);
    226      1.1  cgd 	}
    227      1.1  cgd 	return (RET_SUCCESS);
    228      1.1  cgd }
    229