Home | History | Annotate | Line # | Download | only in btree
bt_overflow.c revision 1.17.2.2
      1  1.17.2.2      yamt /*	$NetBSD: bt_overflow.c,v 1.17.2.2 2014/05/22 11:36:51 yamt Exp $	*/
      2       1.5       cgd 
      3       1.1       cgd /*-
      4       1.4       cgd  * Copyright (c) 1990, 1993, 1994
      5       1.1       cgd  *	The Regents of the University of California.  All rights reserved.
      6       1.1       cgd  *
      7       1.1       cgd  * This code is derived from software contributed to Berkeley by
      8       1.1       cgd  * Mike Olson.
      9       1.1       cgd  *
     10       1.1       cgd  * Redistribution and use in source and binary forms, with or without
     11       1.1       cgd  * modification, are permitted provided that the following conditions
     12       1.1       cgd  * are met:
     13       1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     14       1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     15       1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     16       1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     17       1.1       cgd  *    documentation and/or other materials provided with the distribution.
     18      1.12       agc  * 3. Neither the name of the University nor the names of its contributors
     19       1.1       cgd  *    may be used to endorse or promote products derived from this software
     20       1.1       cgd  *    without specific prior written permission.
     21       1.1       cgd  *
     22       1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23       1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24       1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25       1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26       1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27       1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28       1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29       1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30       1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31       1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32       1.1       cgd  * SUCH DAMAGE.
     33       1.1       cgd  */
     34       1.1       cgd 
     35      1.16     joerg #if HAVE_NBTOOL_CONFIG_H
     36      1.16     joerg #include "nbtool_config.h"
     37      1.16     joerg #endif
     38      1.16     joerg 
     39       1.7  christos #include <sys/cdefs.h>
     40  1.17.2.2      yamt __RCSID("$NetBSD: bt_overflow.c,v 1.17.2.2 2014/05/22 11:36:51 yamt Exp $");
     41       1.1       cgd 
     42       1.8       jtc #include "namespace.h"
     43       1.1       cgd #include <sys/param.h>
     44       1.1       cgd 
     45      1.13  christos #include <assert.h>
     46       1.1       cgd #include <stdio.h>
     47       1.1       cgd #include <stdlib.h>
     48       1.1       cgd #include <string.h>
     49       1.1       cgd 
     50       1.1       cgd #include <db.h>
     51       1.1       cgd #include "btree.h"
     52       1.1       cgd 
     53       1.1       cgd /*
     54       1.1       cgd  * Big key/data code.
     55       1.1       cgd  *
     56       1.1       cgd  * Big key and data entries are stored on linked lists of pages.  The initial
     57       1.1       cgd  * reference is byte string stored with the key or data and is the page number
     58       1.1       cgd  * and size.  The actual record is stored in a chain of pages linked by the
     59       1.1       cgd  * nextpg field of the PAGE header.
     60       1.1       cgd  *
     61       1.1       cgd  * The first page of the chain has a special property.  If the record is used
     62       1.1       cgd  * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
     63       1.1       cgd  * in the header.
     64       1.1       cgd  *
     65       1.1       cgd  * XXX
     66       1.1       cgd  * A single DBT is written to each chain, so a lot of space on the last page
     67       1.1       cgd  * is wasted.  This is a fairly major bug for some data sets.
     68       1.1       cgd  */
     69       1.1       cgd 
     70       1.1       cgd /*
     71       1.1       cgd  * __OVFL_GET -- Get an overflow key/data item.
     72       1.1       cgd  *
     73       1.1       cgd  * Parameters:
     74       1.1       cgd  *	t:	tree
     75      1.14     joerg  *	p:	pointer to { pgno_t, uint32_t }
     76       1.1       cgd  *	buf:	storage address
     77       1.1       cgd  *	bufsz:	storage size
     78       1.1       cgd  *
     79       1.1       cgd  * Returns:
     80       1.1       cgd  *	RET_ERROR, RET_SUCCESS
     81       1.1       cgd  */
     82       1.1       cgd int
     83      1.13  christos __ovfl_get(BTREE *t, void *p, size_t *ssz, void **buf, size_t *bufsz)
     84       1.1       cgd {
     85       1.1       cgd 	PAGE *h;
     86       1.1       cgd 	pgno_t pg;
     87      1.14     joerg 	uint32_t sz, nb, plen;
     88      1.13  christos 	size_t temp;
     89       1.1       cgd 
     90      1.17  christos 	memmove(&pg, p, sizeof(pg));
     91      1.14     joerg 	memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t));
     92       1.1       cgd 	*ssz = sz;
     93       1.1       cgd 
     94       1.1       cgd #ifdef DEBUG
     95       1.1       cgd 	if (pg == P_INVALID || sz == 0)
     96       1.1       cgd 		abort();
     97       1.1       cgd #endif
     98       1.1       cgd 	/* Make the buffer bigger as necessary. */
     99       1.1       cgd 	if (*bufsz < sz) {
    100  1.17.2.2      yamt 		void *nbuf = realloc(*buf, sz);
    101  1.17.2.2      yamt 		if (nbuf == NULL)
    102       1.1       cgd 			return (RET_ERROR);
    103  1.17.2.2      yamt 		*buf = nbuf;
    104       1.1       cgd 		*bufsz = sz;
    105       1.1       cgd 	}
    106       1.1       cgd 
    107       1.1       cgd 	/*
    108       1.1       cgd 	 * Step through the linked list of pages, copying the data on each one
    109       1.1       cgd 	 * into the buffer.  Never copy more than the data's length.
    110       1.1       cgd 	 */
    111      1.13  christos 	temp = t->bt_psize - BTDATAOFF;
    112      1.14     joerg 	_DBFIT(temp, uint32_t);
    113      1.14     joerg 	plen = (uint32_t)temp;
    114       1.1       cgd 	for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
    115       1.1       cgd 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    116       1.1       cgd 			return (RET_ERROR);
    117       1.1       cgd 
    118       1.1       cgd 		nb = MIN(sz, plen);
    119       1.9  christos 		memmove(p, (char *)(void *)h + BTDATAOFF, nb);
    120       1.1       cgd 		mpool_put(t->bt_mp, h, 0);
    121       1.1       cgd 
    122       1.1       cgd 		if ((sz -= nb) == 0)
    123       1.1       cgd 			break;
    124       1.1       cgd 	}
    125       1.1       cgd 	return (RET_SUCCESS);
    126       1.1       cgd }
    127       1.1       cgd 
    128       1.1       cgd /*
    129       1.1       cgd  * __OVFL_PUT -- Store an overflow key/data item.
    130       1.1       cgd  *
    131       1.1       cgd  * Parameters:
    132       1.1       cgd  *	t:	tree
    133       1.1       cgd  *	data:	DBT to store
    134       1.1       cgd  *	pgno:	storage page number
    135       1.1       cgd  *
    136       1.1       cgd  * Returns:
    137       1.1       cgd  *	RET_ERROR, RET_SUCCESS
    138       1.1       cgd  */
    139       1.1       cgd int
    140      1.13  christos __ovfl_put(BTREE *t, const DBT *dbt, pgno_t *pg)
    141       1.1       cgd {
    142       1.1       cgd 	PAGE *h, *last;
    143       1.1       cgd 	void *p;
    144       1.1       cgd 	pgno_t npg;
    145      1.14     joerg 	uint32_t sz, nb, plen;
    146      1.13  christos 	size_t temp;
    147       1.1       cgd 
    148       1.1       cgd 	/*
    149       1.1       cgd 	 * Allocate pages and copy the key/data record into them.  Store the
    150       1.1       cgd 	 * number of the first page in the chain.
    151       1.1       cgd 	 */
    152      1.13  christos 	temp = t->bt_psize - BTDATAOFF;
    153      1.14     joerg 	_DBFIT(temp, uint32_t);
    154      1.14     joerg 	plen = (uint32_t)temp;
    155      1.13  christos 	last = NULL;
    156      1.13  christos 	p = dbt->data;
    157      1.13  christos 	temp = dbt->size;
    158      1.14     joerg 	_DBFIT(temp, uint32_t);
    159  1.17.2.1      yamt 	sz = (uint32_t)temp;
    160      1.13  christos 	for (;; p = (char *)p + plen, last = h) {
    161       1.1       cgd 		if ((h = __bt_new(t, &npg)) == NULL)
    162       1.1       cgd 			return (RET_ERROR);
    163       1.1       cgd 
    164       1.1       cgd 		h->pgno = npg;
    165       1.1       cgd 		h->nextpg = h->prevpg = P_INVALID;
    166       1.1       cgd 		h->flags = P_OVERFLOW;
    167       1.1       cgd 		h->lower = h->upper = 0;
    168       1.1       cgd 
    169       1.1       cgd 		nb = MIN(sz, plen);
    170      1.13  christos 		(void)memmove((char *)(void *)h + BTDATAOFF, p, (size_t)nb);
    171       1.1       cgd 
    172       1.1       cgd 		if (last) {
    173       1.1       cgd 			last->nextpg = h->pgno;
    174       1.1       cgd 			mpool_put(t->bt_mp, last, MPOOL_DIRTY);
    175       1.1       cgd 		} else
    176       1.1       cgd 			*pg = h->pgno;
    177       1.1       cgd 
    178       1.1       cgd 		if ((sz -= nb) == 0) {
    179       1.1       cgd 			mpool_put(t->bt_mp, h, MPOOL_DIRTY);
    180       1.1       cgd 			break;
    181       1.1       cgd 		}
    182       1.1       cgd 	}
    183       1.1       cgd 	return (RET_SUCCESS);
    184       1.1       cgd }
    185       1.1       cgd 
    186       1.1       cgd /*
    187       1.1       cgd  * __OVFL_DELETE -- Delete an overflow chain.
    188       1.1       cgd  *
    189       1.1       cgd  * Parameters:
    190       1.1       cgd  *	t:	tree
    191      1.14     joerg  *	p:	pointer to { pgno_t, uint32_t }
    192       1.1       cgd  *
    193       1.1       cgd  * Returns:
    194       1.1       cgd  *	RET_ERROR, RET_SUCCESS
    195       1.1       cgd  */
    196       1.1       cgd int
    197      1.13  christos __ovfl_delete(BTREE *t, void *p)
    198       1.1       cgd {
    199       1.1       cgd 	PAGE *h;
    200       1.1       cgd 	pgno_t pg;
    201      1.14     joerg 	uint32_t sz, plen;
    202      1.13  christos 	size_t temp;
    203       1.1       cgd 
    204      1.17  christos 	(void)memmove(&pg, p, sizeof(pg));
    205      1.14     joerg 	(void)memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(uint32_t));
    206       1.1       cgd 
    207       1.1       cgd #ifdef DEBUG
    208       1.1       cgd 	if (pg == P_INVALID || sz == 0)
    209       1.1       cgd 		abort();
    210       1.1       cgd #endif
    211       1.1       cgd 	if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    212       1.1       cgd 		return (RET_ERROR);
    213       1.1       cgd 
    214       1.1       cgd 	/* Don't delete chains used by internal pages. */
    215       1.1       cgd 	if (h->flags & P_PRESERVE) {
    216       1.1       cgd 		mpool_put(t->bt_mp, h, 0);
    217       1.1       cgd 		return (RET_SUCCESS);
    218       1.1       cgd 	}
    219       1.1       cgd 
    220       1.1       cgd 	/* Step through the chain, calling the free routine for each page. */
    221      1.13  christos 	temp = t->bt_psize - BTDATAOFF;
    222      1.14     joerg 	_DBFIT(temp, uint32_t);
    223      1.14     joerg 	plen = (uint32_t)temp;
    224      1.13  christos 	for (;; sz -= plen) {
    225       1.1       cgd 		pg = h->nextpg;
    226       1.1       cgd 		__bt_free(t, h);
    227       1.1       cgd 		if (sz <= plen)
    228       1.1       cgd 			break;
    229       1.1       cgd 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
    230       1.1       cgd 			return (RET_ERROR);
    231       1.1       cgd 	}
    232       1.1       cgd 	return (RET_SUCCESS);
    233       1.1       cgd }
    234