Home | History | Annotate | Line # | Download | only in pppd
      1 /*	NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp 	*/
      2 
      3 /*
      4  * Database functions
      5  * Copyright (C) Andrew Tridgell 1999
      6  *
      7  * Redistribution and use in source and binary forms are permitted
      8  * provided that the above copyright notice and this paragraph are
      9  * duplicated in all such forms AND provided that this software or
     10  * any derived work is only used as part of the PPP daemon (pppd)
     11  * and related utilities.
     12  * The name of the author may not be used to endorse or promote products
     13  * derived from this software without specific prior written permission.
     14  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
     15  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
     16  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
     17  *
     18  * Note: this software is also available under the Gnu Public License
     19  * version 2 or later.
     20  */
     21 
     22 #include <sys/cdefs.h>
     23 #ifndef lint
     24 __RCSID("NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp ");
     25 #endif
     26 
     27 #include <stdlib.h>
     28 #include <stdio.h>
     29 #include <fcntl.h>
     30 #include <unistd.h>
     31 #include <string.h>
     32 #include <errno.h>
     33 #include <sys/mman.h>
     34 #include <sys/stat.h>
     35 #include "tdb.h"
     36 
     37 #define TDB_VERSION (0x26011967 + 1)
     38 #define TDB_MAGIC (0x26011999U)
     39 #define TDB_FREE_MAGIC (~TDB_MAGIC)
     40 #define TDB_ALIGN 4
     41 #define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGN)
     42 #define DEFAULT_HASH_SIZE 128
     43 #define TDB_PAGE_SIZE 0x2000
     44 #define TDB_LEN_MULTIPLIER 10
     45 #define FREELIST_TOP (sizeof(struct tdb_header))
     46 
     47 #define LOCK_SET 1
     48 #define LOCK_CLEAR 0
     49 
     50 /* lock offsets */
     51 #define GLOBAL_LOCK 0
     52 #define ACTIVE_LOCK 4
     53 #define LIST_LOCK_BASE 1024
     54 
     55 #define BUCKET(hash) ((hash) % tdb->header.hash_size)
     56 
     57 #ifndef MAP_FILE
     58 #define MAP_FILE 0
     59 #endif
     60 
     61 /* the body of the database is made of one list_struct for the free space
     62    plus a separate data list for each hash value */
     63 struct list_struct {
     64 	tdb_len rec_len; /* total byte length of record */
     65 	tdb_off next; /* offset of the next record in the list */
     66 	tdb_len key_len; /* byte length of key */
     67 	tdb_len data_len; /* byte length of data */
     68 	unsigned full_hash; /* the full 32 bit hash of the key */
     69 	unsigned magic;   /* try to catch errors */
     70 	/*
     71 	   the following union is implied
     72 	   union {
     73               char record[rec_len];
     74 	      struct {
     75 	        char key[key_len];
     76 		char data[data_len];
     77 	      }
     78            }
     79 	*/
     80 };
     81 
     82 /* a null data record - useful for error returns */
     83 static TDB_DATA null_data;
     84 
     85 static int tdb_update __P((TDB_CONTEXT *, TDB_DATA, TDB_DATA));
     86 #ifdef notyet
     87 static int tdb_lockchain __P((TDB_CONTEXT *, TDB_DATA));
     88 static int tdb_unlockchain __P((TDB_CONTEXT *, TDB_DATA));
     89 #endif
     90 
     91 /* a byte range locking function - return 0 on success
     92    this functions locks/unlocks 1 byte at the specified offset */
     93 static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset,
     94 		      int set, int rw_type, int lck_type)
     95 {
     96 #if NOLOCK
     97 	return 0;
     98 #else
     99 	struct flock fl;
    100 
    101         if (tdb->fd == -1) return 0;   /* for in memory tdb */
    102 
    103 	if (tdb->read_only) return -1;
    104 
    105 	fl.l_type = set==LOCK_SET?rw_type:F_UNLCK;
    106 	fl.l_whence = SEEK_SET;
    107 	fl.l_start = offset;
    108 	fl.l_len = 1;
    109 	fl.l_pid = 0;
    110 
    111 	if (fcntl(tdb->fd, lck_type, &fl) != 0) {
    112 #if TDB_DEBUG
    113 		if (lck_type == F_SETLKW) {
    114 			printf("lock %d failed at %d (%s)\n",
    115 			       set, offset, strerror(errno));
    116 		}
    117 #endif
    118 		tdb->ecode = TDB_ERR_LOCK;
    119 		return -1;
    120 	}
    121 	return 0;
    122 #endif
    123 }
    124 
    125 /* lock a list in the database. list -1 is the alloc list */
    126 static int tdb_lock(TDB_CONTEXT *tdb, int list)
    127 {
    128 	if (list < -1 || list >= (int)tdb->header.hash_size) {
    129 #if TDB_DEBUG
    130 		printf("bad list %d\n", list);
    131 #endif
    132 		return -1;
    133 	}
    134 	if (tdb->locked[list+1] == 0) {
    135 		if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_SET,
    136 			       F_WRLCK, F_SETLKW) != 0) {
    137 			return -1;
    138 		}
    139 	}
    140 	tdb->locked[list+1]++;
    141 	return 0;
    142 }
    143 
    144 /* unlock the database. */
    145 static int tdb_unlock(TDB_CONTEXT *tdb, int list)
    146 {
    147 	if (list < -1 || list >= (int)tdb->header.hash_size) {
    148 #if TDB_DEBUG
    149 		printf("bad unlock list %d\n", list);
    150 #endif
    151 		return -1;
    152 	}
    153 
    154 	if (tdb->locked[list+1] == 0) {
    155 #if TDB_DEBUG
    156 		printf("not locked %d\n", list);
    157 #endif
    158 		tdb->ecode = TDB_ERR_LOCK;
    159 		return -1;
    160 	}
    161 	if (tdb->locked[list+1] == 1) {
    162 		if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_CLEAR,
    163 			       F_WRLCK, F_SETLKW) != 0) {
    164 			return -1;
    165 		}
    166 	}
    167 	tdb->locked[list+1]--;
    168 	return 0;
    169 }
    170 
    171 /* the hash algorithm - turn a key into an integer
    172    This is based on the hash agorithm from gdbm */
    173 static unsigned tdb_hash(TDB_DATA *key)
    174 {
    175 	unsigned value;	/* Used to compute the hash value.  */
    176 	unsigned   i;	/* Used to cycle through random values. */
    177 
    178 	/* Set the initial value from the key size. */
    179 	value = 0x238F13AF * key->dsize;
    180 	for (i=0; i < key->dsize; i++) {
    181 		value = (value + (key->dptr[i] << (i*5 % 24)));
    182 	}
    183 
    184 	value = (1103515243 * value + 12345);
    185 
    186 	return value;
    187 }
    188 
    189 /* find the top of the hash chain for an open database */
    190 static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash)
    191 {
    192 	tdb_off ret;
    193 	hash = BUCKET(hash);
    194 	ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off);
    195 	return ret;
    196 }
    197 
    198 
    199 /* check for an out of bounds access - if it is out of bounds then
    200    see if the database has been expanded by someone else and expand
    201    if necessary */
    202 static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset)
    203 {
    204 	struct stat st;
    205 	if ((offset <= tdb->map_size) || (tdb->fd == -1)) return 0;
    206 
    207 	fstat(tdb->fd, &st);
    208 	if (st.st_size <= (ssize_t)offset) {
    209 		tdb->ecode = TDB_ERR_IO;
    210 		return -1;
    211 	}
    212 
    213 #if HAVE_MMAP
    214 	if (tdb->map_ptr) {
    215 		munmap(tdb->map_ptr, tdb->map_size);
    216 		tdb->map_ptr = NULL;
    217 	}
    218 #endif
    219 
    220 	tdb->map_size = st.st_size;
    221 #if HAVE_MMAP
    222 	tdb->map_ptr = (void *)mmap(NULL, tdb->map_size,
    223 				    tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE,
    224 				    MAP_SHARED | MAP_FILE, tdb->fd, 0);
    225 	if (tdb->map_ptr == MAP_FAILED)
    226 		tdb->map_ptr = NULL;
    227 #endif
    228 	return 0;
    229 }
    230 
    231 
    232 /* write a lump of data at a specified offset */
    233 static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len)
    234 {
    235 	if (tdb_oob(tdb, offset + len) != 0) {
    236 		/* oops - trying to write beyond the end of the database! */
    237 		return -1;
    238 	}
    239 
    240 	if (tdb->map_ptr) {
    241 		memcpy(offset + (char *)tdb->map_ptr, buf, len);
    242 	} else {
    243 		if (lseek(tdb->fd, offset, SEEK_SET) != offset ||
    244 		    write(tdb->fd, buf, len) != (ssize_t)len) {
    245 			tdb->ecode = TDB_ERR_IO;
    246 			return -1;
    247 		}
    248 	}
    249 	return 0;
    250 }
    251 
    252 /* read a lump of data at a specified offset */
    253 static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len)
    254 {
    255 	if (tdb_oob(tdb, offset + len) != 0) {
    256 		/* oops - trying to read beyond the end of the database! */
    257 		return -1;
    258 	}
    259 
    260 	if (tdb->map_ptr) {
    261 		memcpy(buf, offset + (char *)tdb->map_ptr, len);
    262 	} else {
    263 		if (lseek(tdb->fd, offset, SEEK_SET) != offset ||
    264 		    read(tdb->fd, buf, len) != (ssize_t)len) {
    265 			tdb->ecode = TDB_ERR_IO;
    266 			return -1;
    267 		}
    268 	}
    269 	return 0;
    270 }
    271 
    272 
    273 /* read a lump of data, allocating the space for it */
    274 static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len)
    275 {
    276 	char *buf;
    277 
    278 	buf = (char *)malloc(len);
    279 
    280 	if (!buf) {
    281 		tdb->ecode = TDB_ERR_OOM;
    282 		return NULL;
    283 	}
    284 
    285 	if (tdb_read(tdb, offset, buf, len) == -1) {
    286 		free(buf);
    287 		return NULL;
    288 	}
    289 
    290 	return buf;
    291 }
    292 
    293 /* convenience routine for writing a record */
    294 static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
    295 {
    296 	return tdb_write(tdb, offset, (char *)rec, sizeof(*rec));
    297 }
    298 
    299 /* convenience routine for writing a tdb_off */
    300 static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
    301 {
    302 	return tdb_write(tdb, offset, (char *)d, sizeof(*d));
    303 }
    304 
    305 /* read a tdb_off from the store */
    306 static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
    307 {
    308 	return tdb_read(tdb, offset, (char *)d, sizeof(*d));
    309 }
    310 
    311 /* read a record and check for simple errors */
    312 static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
    313 {
    314 	if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1;
    315 	if (rec->magic != TDB_MAGIC) {
    316 #if TDB_DEBUG
    317 		printf("bad magic 0x%08x at offset %d\n",
    318 		       rec->magic, offset);
    319 #endif
    320 		tdb->ecode = TDB_ERR_CORRUPT;
    321 		return -1;
    322 	}
    323 	if (tdb_oob(tdb, rec->next) != 0) {
    324 		return -1;
    325 	}
    326 	return 0;
    327 }
    328 
    329 /* expand the database at least length bytes by expanding the
    330    underlying file and doing the mmap again if necessary */
    331 static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length)
    332 {
    333 	struct list_struct rec;
    334 	tdb_off offset, ptr;
    335 	char b = 0;
    336 
    337 	tdb_lock(tdb,-1);
    338 
    339 	/* make sure we know about any previous expansions by another
    340            process */
    341 	tdb_oob(tdb,tdb->map_size + 1);
    342 
    343 	/* always make room for at least 10 more records */
    344 	length *= TDB_LEN_MULTIPLIER;
    345 
    346 	/* and round the database up to a multiple of TDB_PAGE_SIZE */
    347 	length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size;
    348 
    349 	/* expand the file itself */
    350         if (tdb->fd != -1) {
    351             lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET);
    352             if (write(tdb->fd, &b, 1) != 1) goto fail;
    353         }
    354 
    355 	/* form a new freelist record */
    356 	offset = FREELIST_TOP;
    357 	rec.rec_len = length - sizeof(rec);
    358 	rec.magic = TDB_FREE_MAGIC;
    359 	if (ofs_read(tdb, offset, &rec.next) == -1) {
    360 		goto fail;
    361 	}
    362 
    363 #if HAVE_MMAP
    364 	if (tdb->fd != -1 && tdb->map_ptr) {
    365 		munmap(tdb->map_ptr, tdb->map_size);
    366 		tdb->map_ptr = NULL;
    367 	}
    368 #endif
    369 
    370 	tdb->map_size += length;
    371 
    372         if (tdb->fd == -1) {
    373 		void *n;
    374 		n = realloc(tdb->map_ptr, tdb->map_size);
    375 		if (!n)
    376 			goto fail;
    377 		tdb->map_ptr = n;
    378         }
    379 
    380 	/* write it out */
    381 	if (rec_write(tdb, tdb->map_size - length, &rec) == -1) {
    382 		goto fail;
    383 	}
    384 
    385 	/* link it into the free list */
    386 	ptr = tdb->map_size - length;
    387 	if (ofs_write(tdb, offset, &ptr) == -1) goto fail;
    388 
    389 #if HAVE_MMAP
    390         if (tdb->fd != -1) {
    391             tdb->map_ptr = (void *)mmap(NULL, tdb->map_size,
    392                                         PROT_READ|PROT_WRITE,
    393                                         MAP_SHARED | MAP_FILE, tdb->fd, 0);
    394 	    if (tdb->map_ptr == MAP_FAILED)
    395 		    tdb->map_ptr = NULL;
    396         }
    397 #endif
    398 
    399 	tdb_unlock(tdb, -1);
    400 	return 0;
    401 
    402  fail:
    403 	tdb_unlock(tdb,-1);
    404 	return -1;
    405 }
    406 
    407 /* allocate some space from the free list. The offset returned points
    408    to a unconnected list_struct within the database with room for at
    409    least length bytes of total data
    410 
    411    0 is returned if the space could not be allocated
    412  */
    413 static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length)
    414 {
    415 	tdb_off offset, rec_ptr, last_ptr;
    416 	struct list_struct rec, lastrec, newrec;
    417 
    418 	tdb_lock(tdb, -1);
    419 
    420  again:
    421 	last_ptr = 0;
    422 	offset = FREELIST_TOP;
    423 
    424 	/* read in the freelist top */
    425 	if (ofs_read(tdb, offset, &rec_ptr) == -1) {
    426 		goto fail;
    427 	}
    428 
    429 	/* keep looking until we find a freelist record that is big
    430            enough */
    431 	while (rec_ptr) {
    432 		if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) {
    433 			goto fail;
    434 		}
    435 
    436 		if (rec.magic != TDB_FREE_MAGIC) {
    437 #if TDB_DEBUG
    438 			printf("bad magic 0x%08x in free list\n", rec.magic);
    439 #endif
    440 			goto fail;
    441 		}
    442 
    443 		if (rec.rec_len >= length) {
    444 			/* found it - now possibly split it up  */
    445 			if (rec.rec_len > length + MIN_REC_SIZE) {
    446 				length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1);
    447 
    448 				newrec.rec_len = rec.rec_len - (sizeof(rec) + length);
    449 				newrec.next = rec.next;
    450 				newrec.magic = TDB_FREE_MAGIC;
    451 
    452 				rec.rec_len = length;
    453 				rec.next = rec_ptr + sizeof(rec) + rec.rec_len;
    454 
    455 				if (rec_write(tdb, rec.next, &newrec) == -1) {
    456 					goto fail;
    457 				}
    458 
    459 				if (rec_write(tdb, rec_ptr, &rec) == -1) {
    460 					goto fail;
    461 				}
    462 			}
    463 
    464 			/* remove it from the list */
    465 			if (last_ptr == 0) {
    466 				offset = FREELIST_TOP;
    467 
    468 				if (ofs_write(tdb, offset, &rec.next) == -1) {
    469 					goto fail;
    470 				}
    471 			} else {
    472 				lastrec.next = rec.next;
    473 				if (rec_write(tdb, last_ptr, &lastrec) == -1) {
    474 					goto fail;
    475 				}
    476 			}
    477 
    478 			/* all done - return the new record offset */
    479 			tdb_unlock(tdb, -1);
    480 			return rec_ptr;
    481 		}
    482 
    483 		/* move to the next record */
    484 		lastrec = rec;
    485 		last_ptr = rec_ptr;
    486 		rec_ptr = rec.next;
    487 	}
    488 
    489 	/* we didn't find enough space. See if we can expand the
    490 	   database and if we can then try again */
    491 	if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again;
    492 
    493  fail:
    494 #if TDB_DEBUG
    495 	printf("tdb_allocate failed for size %u\n", length);
    496 #endif
    497 	tdb_unlock(tdb, -1);
    498 	return 0;
    499 }
    500 
    501 /* initialise a new database with a specified hash size */
    502 static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size)
    503 {
    504 	struct tdb_header header;
    505 	int i, size = 0;
    506 	tdb_off buf[16];
    507 
    508         /* create the header */
    509         memset(&header, 0, sizeof(header));
    510         memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
    511         header.version = TDB_VERSION;
    512         header.hash_size = hash_size;
    513         lseek(tdb->fd, 0, SEEK_SET);
    514         ftruncate(tdb->fd, 0);
    515 
    516         if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) !=
    517             sizeof(header)) {
    518             tdb->ecode = TDB_ERR_IO;
    519             return -1;
    520         } else size += sizeof(header);
    521 
    522         /* the freelist and hash pointers */
    523         memset(buf, 0, sizeof(buf));
    524 
    525         for (i=0;(hash_size+1)-i >= 16; i += 16) {
    526             if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) !=
    527                 sizeof(buf)) {
    528                 tdb->ecode = TDB_ERR_IO;
    529                 return -1;
    530             } else size += sizeof(buf);
    531         }
    532 
    533         for (;i<hash_size+1; i++) {
    534             if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) !=
    535                 sizeof(tdb_off)) {
    536                 tdb->ecode = TDB_ERR_IO;
    537                 return -1;
    538             } else size += sizeof(tdb_off);
    539         }
    540 
    541         if (tdb->fd == -1) {
    542             tdb->map_ptr = calloc(size, 1);
    543             tdb->map_size = size;
    544             if (tdb->map_ptr == NULL) {
    545                 tdb->ecode = TDB_ERR_IO;
    546                 return -1;
    547             }
    548             memcpy(&tdb->header, &header, sizeof(header));
    549         }
    550 
    551 #if TDB_DEBUG
    552 	printf("initialised database of hash_size %u\n",
    553 	       hash_size);
    554 #endif
    555 	return 0;
    556 }
    557 
    558 /* Returns 0 on fail.  On success, return offset of record, and fills
    559    in rec */
    560 static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash,
    561 			struct list_struct *rec)
    562 {
    563 	tdb_off offset, rec_ptr;
    564 
    565 	/* find the top of the hash chain */
    566 	offset = tdb_hash_top(tdb, hash);
    567 
    568 	/* read in the hash top */
    569 	if (ofs_read(tdb, offset, &rec_ptr) == -1)
    570 		return 0;
    571 
    572 	/* keep looking until we find the right record */
    573 	while (rec_ptr) {
    574 		if (rec_read(tdb, rec_ptr, rec) == -1)
    575 			return 0;
    576 
    577 		if (hash == rec->full_hash && key.dsize == rec->key_len) {
    578 			char *k;
    579 			/* a very likely hit - read the key */
    580 			k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec),
    581 					   rec->key_len);
    582 
    583 			if (!k)
    584 				return 0;
    585 
    586 			if (memcmp(key.dptr, k, key.dsize) == 0) {
    587 				free(k);
    588 				return rec_ptr;
    589 			}
    590 			free(k);
    591 		}
    592 
    593 		/* move to the next record */
    594 		rec_ptr = rec->next;
    595 	}
    596 	return 0;
    597 }
    598 
    599 /*
    600    return an error string for the last tdb error
    601 */
    602 char *tdb_errorstr(TDB_CONTEXT *tdb)
    603 {
    604 	int i;
    605 	static struct {
    606 		enum TDB_ERROR ecode;
    607 		char *estring;
    608 	} emap[] = {
    609 		{TDB_SUCCESS, "Success"},
    610 		{TDB_ERR_CORRUPT, "Corrupt database"},
    611 		{TDB_ERR_IO, "IO Error"},
    612 		{TDB_ERR_LOCK, "Locking error"},
    613 		{TDB_ERR_OOM, "Out of memory"},
    614 		{TDB_ERR_EXISTS, "Record exists"},
    615 		{-1, NULL}};
    616         if (tdb != NULL) {
    617             for (i=0;emap[i].estring;i++) {
    618 		if (tdb->ecode == emap[i].ecode) return emap[i].estring;
    619             }
    620         } else {
    621             return "Invalid tdb context";
    622         }
    623 	return "Invalid error code";
    624 }
    625 
    626 
    627 /* update an entry in place - this only works if the new data size
    628    is <= the old data size and the key exists.
    629    on failure return -1
    630 */
    631 static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf)
    632 {
    633 	unsigned hash;
    634 	struct list_struct rec;
    635 	tdb_off rec_ptr;
    636 	int ret = -1;
    637 
    638         if (tdb == NULL) {
    639 #ifdef TDB_DEBUG
    640             printf("tdb_update() called with null context\n");
    641 #endif
    642             return -1;
    643         }
    644 
    645 	/* find which hash bucket it is in */
    646 	hash = tdb_hash(&key);
    647 
    648 	tdb_lock(tdb, BUCKET(hash));
    649 	rec_ptr = tdb_find(tdb, key, hash, &rec);
    650 
    651 	if (!rec_ptr)
    652 		goto out;
    653 
    654 	/* must be long enough */
    655 	if (rec.rec_len < key.dsize + dbuf.dsize)
    656 		goto out;
    657 
    658 	if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
    659 		      dbuf.dptr, dbuf.dsize) == -1)
    660 		goto out;
    661 
    662 	if (dbuf.dsize != rec.data_len) {
    663 		/* update size */
    664 		rec.data_len = dbuf.dsize;
    665 		ret = rec_write(tdb, rec_ptr, &rec);
    666 	} else
    667 		ret = 0;
    668 
    669  out:
    670 	tdb_unlock(tdb, BUCKET(hash));
    671 	return ret;
    672 }
    673 
    674 /* find an entry in the database given a key */
    675 TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
    676 {
    677 	unsigned hash;
    678 	tdb_off rec_ptr;
    679 	struct list_struct rec;
    680 	TDB_DATA ret = null_data;
    681 
    682         if (tdb == NULL) {
    683 #ifdef TDB_DEBUG
    684             printf("tdb_fetch() called with null context\n");
    685 #endif
    686             return null_data;
    687         }
    688 
    689 	/* find which hash bucket it is in */
    690 	hash = tdb_hash(&key);
    691 
    692 	tdb_lock(tdb, BUCKET(hash));
    693 	rec_ptr = tdb_find(tdb, key, hash, &rec);
    694 
    695 	if (rec_ptr) {
    696 		ret.dptr = tdb_alloc_read(tdb,
    697 					  rec_ptr + sizeof(rec) + rec.key_len,
    698 					  rec.data_len);
    699 		ret.dsize = rec.data_len;
    700 	}
    701 
    702 	tdb_unlock(tdb, BUCKET(hash));
    703 	return ret;
    704 }
    705 
    706 /* check if an entry in the database exists
    707 
    708    note that 1 is returned if the key is found and 0 is returned if not found
    709    this doesn't match the conventions in the rest of this module, but is
    710    compatible with gdbm
    711 */
    712 int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key)
    713 {
    714 	unsigned hash;
    715 	tdb_off rec_ptr;
    716 	struct list_struct rec;
    717 
    718         if (tdb == NULL) {
    719 #ifdef TDB_DEBUG
    720             printf("tdb_exists() called with null context\n");
    721 #endif
    722             return 0;
    723         }
    724 
    725 	/* find which hash bucket it is in */
    726 	hash = tdb_hash(&key);
    727 
    728 	tdb_lock(tdb, BUCKET(hash));
    729 	rec_ptr = tdb_find(tdb, key, hash, &rec);
    730 	tdb_unlock(tdb, BUCKET(hash));
    731 
    732 	return rec_ptr != 0;
    733 }
    734 
    735 /* traverse the entire database - calling fn(tdb, key, data) on each element.
    736    return -1 on error or the record count traversed
    737    if fn is NULL then it is not called
    738    a non-zero return value from fn() indicates that the traversal should stop
    739   */
    740 int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state)
    741 {
    742 	int count = 0;
    743 	unsigned h;
    744 	tdb_off offset, rec_ptr;
    745 	struct list_struct rec;
    746 	char *data;
    747 	TDB_DATA key, dbuf;
    748 
    749         if (tdb == NULL) {
    750 #ifdef TDB_DEBUG
    751             printf("tdb_traverse() called with null context\n");
    752 #endif
    753             return -1;
    754         }
    755 
    756 	/* loop over all hash chains */
    757 	for (h = 0; h < tdb->header.hash_size; h++) {
    758 		tdb_lock(tdb, BUCKET(h));
    759 
    760 		/* read in the hash top */
    761 		offset = tdb_hash_top(tdb, h);
    762 		if (ofs_read(tdb, offset, &rec_ptr) == -1) {
    763 			goto fail;
    764 		}
    765 
    766 		/* traverse all records for this hash */
    767 		while (rec_ptr) {
    768 			if (rec_read(tdb, rec_ptr, &rec) == -1) {
    769 				goto fail;
    770 			}
    771 
    772 			/* now read the full record */
    773 			data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec),
    774 					     rec.key_len + rec.data_len);
    775 			if (!data) {
    776 				goto fail;
    777 			}
    778 
    779 			key.dptr = data;
    780 			key.dsize = rec.key_len;
    781 			dbuf.dptr = data + rec.key_len;
    782 			dbuf.dsize = rec.data_len;
    783 			count++;
    784 
    785 			if (fn && fn(tdb, key, dbuf, state) != 0) {
    786 				/* they want us to stop traversing */
    787 				free(data);
    788 				tdb_unlock(tdb, BUCKET(h));
    789 				return count;
    790 			}
    791 
    792 			/* a miss - drat */
    793 			free(data);
    794 
    795 			/* move to the next record */
    796 			rec_ptr = rec.next;
    797 		}
    798 		tdb_unlock(tdb, BUCKET(h));
    799 	}
    800 
    801 	/* return the number traversed */
    802 	return count;
    803 
    804  fail:
    805 	tdb_unlock(tdb, BUCKET(h));
    806 	return -1;
    807 }
    808 
    809 
    810 /* find the first entry in the database and return its key */
    811 TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb)
    812 {
    813 	tdb_off offset, rec_ptr;
    814 	struct list_struct rec;
    815 	unsigned hash;
    816 	TDB_DATA ret;
    817 
    818         if (tdb == NULL) {
    819 #ifdef TDB_DEBUG
    820             printf("tdb_firstkey() called with null context\n");
    821 #endif
    822             return null_data;
    823         }
    824 
    825 	/* look for a non-empty hash chain */
    826 	for (hash = 0, rec_ptr = 0;
    827 	     hash < tdb->header.hash_size;
    828 	     hash++) {
    829 		/* find the top of the hash chain */
    830 		offset = tdb_hash_top(tdb, hash);
    831 
    832 		tdb_lock(tdb, BUCKET(hash));
    833 
    834 		/* read in the hash top */
    835 		if (ofs_read(tdb, offset, &rec_ptr) == -1) {
    836 			goto fail;
    837 		}
    838 
    839 		if (rec_ptr) break;
    840 
    841 		tdb_unlock(tdb, BUCKET(hash));
    842 	}
    843 
    844 	if (rec_ptr == 0) return null_data;
    845 
    846 	/* we've found a non-empty chain, now read the record */
    847 	if (rec_read(tdb, rec_ptr, &rec) == -1) {
    848 		goto fail;
    849 	}
    850 
    851 	/* allocate and read the key space */
    852 	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len);
    853 	ret.dsize = rec.key_len;
    854 	tdb_unlock(tdb, BUCKET(hash));
    855 	return ret;
    856 
    857  fail:
    858 	tdb_unlock(tdb, BUCKET(hash));
    859 	return null_data;
    860 }
    861 
    862 /* find the next entry in the database, returning its key */
    863 TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key)
    864 {
    865 	unsigned hash, hbucket;
    866 	tdb_off rec_ptr, offset;
    867 	struct list_struct rec;
    868 	TDB_DATA ret;
    869 
    870         if (tdb == NULL) {
    871 #ifdef TDB_DEBUG
    872             printf("tdb_nextkey() called with null context\n");
    873 #endif
    874             return null_data;
    875         }
    876 
    877 	/* find which hash bucket it is in */
    878 	hash = tdb_hash(&key);
    879 	hbucket = BUCKET(hash);
    880 
    881 	tdb_lock(tdb, hbucket);
    882 	rec_ptr = tdb_find(tdb, key, hash, &rec);
    883 	if (rec_ptr) {
    884 		/* we want the next record after this one */
    885 		rec_ptr = rec.next;
    886 	}
    887 
    888 	/* not found or last in hash: look for next non-empty hash chain */
    889 	while (rec_ptr == 0) {
    890 		tdb_unlock(tdb, hbucket);
    891 
    892 		if (++hbucket >= tdb->header.hash_size - 1)
    893 			return null_data;
    894 
    895 		offset = tdb_hash_top(tdb, hbucket);
    896 		tdb_lock(tdb, hbucket);
    897 		/* read in the hash top */
    898 		if (ofs_read(tdb, offset, &rec_ptr) == -1) {
    899 			tdb_unlock(tdb, hbucket);
    900 			return null_data;
    901 		}
    902 	}
    903 
    904 	/* Read the record. */
    905 	if (rec_read(tdb, rec_ptr, &rec) == -1) {
    906 		tdb_unlock(tdb, hbucket);
    907 		return null_data;
    908 	}
    909 	/* allocate and read the key */
    910 	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len);
    911 	ret.dsize = rec.key_len;
    912 	tdb_unlock(tdb, hbucket);
    913 
    914 	return ret;
    915 }
    916 
    917 /* delete an entry in the database given a key */
    918 int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
    919 {
    920 	unsigned hash;
    921 	tdb_off offset, rec_ptr, last_ptr;
    922 	struct list_struct rec, lastrec;
    923 	char *data = NULL;
    924 
    925         if (tdb == NULL) {
    926 #ifdef TDB_DEBUG
    927             printf("tdb_delete() called with null context\n");
    928 #endif
    929             return -1;
    930         }
    931 
    932 	/* find which hash bucket it is in */
    933 	hash = tdb_hash(&key);
    934 
    935 	tdb_lock(tdb, BUCKET(hash));
    936 
    937 	/* find the top of the hash chain */
    938 	offset = tdb_hash_top(tdb, hash);
    939 
    940 	/* read in the hash top */
    941 	if (ofs_read(tdb, offset, &rec_ptr) == -1) {
    942 		goto fail;
    943 	}
    944 
    945 	last_ptr = 0;
    946 
    947 	/* keep looking until we find the right record */
    948 	while (rec_ptr) {
    949 		if (rec_read(tdb, rec_ptr, &rec) == -1) {
    950 			goto fail;
    951 		}
    952 
    953 		if (hash == rec.full_hash && key.dsize == rec.key_len) {
    954 			/* a very likely hit - read the record and full key */
    955 			data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec),
    956 					     rec.key_len);
    957 			if (!data) {
    958 				goto fail;
    959 			}
    960 
    961 			if (memcmp(key.dptr, data, key.dsize) == 0) {
    962 				/* a definite match - delete it */
    963 				if (last_ptr == 0) {
    964 					offset = tdb_hash_top(tdb, hash);
    965 					if (ofs_write(tdb, offset, &rec.next) == -1) {
    966 						goto fail;
    967 					}
    968 				} else {
    969 					lastrec.next = rec.next;
    970 					if (rec_write(tdb, last_ptr, &lastrec) == -1) {
    971 						goto fail;
    972 					}
    973 				}
    974 				tdb_unlock(tdb, BUCKET(hash));
    975 				tdb_lock(tdb, -1);
    976 				/* and recover the space */
    977 				offset = FREELIST_TOP;
    978 				if (ofs_read(tdb, offset, &rec.next) == -1) {
    979 					goto fail2;
    980 				}
    981 				rec.magic = TDB_FREE_MAGIC;
    982 				if (rec_write(tdb, rec_ptr, &rec) == -1) {
    983 					goto fail2;
    984 				}
    985 				if (ofs_write(tdb, offset, &rec_ptr) == -1) {
    986 					goto fail2;
    987 				}
    988 
    989 				/* yipee - all done */
    990 				free(data);
    991 				tdb_unlock(tdb, -1);
    992 				return 0;
    993 			}
    994 
    995 			/* a miss - drat */
    996 			free(data);
    997 			data = NULL;
    998 		}
    999 
   1000 		/* move to the next record */
   1001 		last_ptr = rec_ptr;
   1002 		lastrec = rec;
   1003 		rec_ptr = rec.next;
   1004 	}
   1005 
   1006  fail:
   1007 	if (data) free(data);
   1008 	tdb_unlock(tdb, BUCKET(hash));
   1009 	return -1;
   1010 
   1011  fail2:
   1012 	if (data) free(data);
   1013 	tdb_unlock(tdb, -1);
   1014 	return -1;
   1015 }
   1016 
   1017 
   1018 /* store an element in the database, replacing any existing element
   1019    with the same key
   1020 
   1021    return 0 on success, -1 on failure
   1022 */
   1023 int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
   1024 {
   1025 	struct list_struct rec;
   1026 	unsigned hash;
   1027 	tdb_off rec_ptr, offset;
   1028 	char *p = NULL;
   1029 
   1030         if (tdb == NULL) {
   1031 #ifdef TDB_DEBUG
   1032             printf("tdb_store() called with null context\n");
   1033 #endif
   1034             return -1;
   1035         }
   1036 
   1037 	/* find which hash bucket it is in */
   1038 	hash = tdb_hash(&key);
   1039 
   1040 	/* check for it existing */
   1041 	if (flag == TDB_INSERT && tdb_exists(tdb, key)) {
   1042 		tdb->ecode = TDB_ERR_EXISTS;
   1043 		return -1;
   1044 	}
   1045 
   1046 	/* first try in-place update */
   1047 	if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) {
   1048 		return 0;
   1049 	}
   1050 
   1051 	rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize);
   1052 	if (rec_ptr == 0) {
   1053 		return -1;
   1054 	}
   1055 
   1056 	tdb_lock(tdb, BUCKET(hash));
   1057 
   1058 	/* delete any existing record - if it doesn't exist we don't care */
   1059 	if (flag != TDB_INSERT) {
   1060 		tdb_delete(tdb, key);
   1061 	}
   1062 
   1063 	/* read the newly created record */
   1064 	if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) {
   1065 		goto fail;
   1066 	}
   1067 
   1068 	if (rec.magic != TDB_FREE_MAGIC) goto fail;
   1069 
   1070 	/* find the top of the hash chain */
   1071 	offset = tdb_hash_top(tdb, hash);
   1072 
   1073 	/* read in the hash top diretcly into our next pointer */
   1074 	if (ofs_read(tdb, offset, &rec.next) == -1) {
   1075 		goto fail;
   1076 	}
   1077 
   1078 	rec.key_len = key.dsize;
   1079 	rec.data_len = dbuf.dsize;
   1080 	rec.full_hash = hash;
   1081 	rec.magic = TDB_MAGIC;
   1082 
   1083 	p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize);
   1084 	if (!p) {
   1085 		tdb->ecode = TDB_ERR_OOM;
   1086 		goto fail;
   1087 	}
   1088 
   1089 	memcpy(p, &rec, sizeof(rec));
   1090 	memcpy(p+sizeof(rec), key.dptr, key.dsize);
   1091 	memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize);
   1092 
   1093 	if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1)
   1094 		goto fail;
   1095 
   1096 	free(p);
   1097 	p = NULL;
   1098 
   1099 	/* and point the top of the hash chain at it */
   1100 	if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail;
   1101 
   1102 	tdb_unlock(tdb, BUCKET(hash));
   1103 	return 0;
   1104 
   1105  fail:
   1106 #if TDB_DEBUG
   1107 	printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash));
   1108 #endif
   1109 	if (p) free(p);
   1110 	tdb_unlock(tdb, BUCKET(hash));
   1111 	return -1;
   1112 }
   1113 
   1114 
   1115 /* open the database, creating it if necessary
   1116 
   1117    The open_flags and mode are passed straight to the open call on the database
   1118    file. A flags value of O_WRONLY is invalid
   1119 
   1120    The hash size is advisory, use zero for a default value.
   1121 
   1122    return is NULL on error
   1123 */
   1124 TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags,
   1125 		      int open_flags, mode_t mode)
   1126 {
   1127 	TDB_CONTEXT tdb, *ret;
   1128 	struct stat st;
   1129 
   1130 	memset(&tdb, 0, sizeof(tdb));
   1131 
   1132 	tdb.fd = -1;
   1133 	tdb.name = NULL;
   1134 	tdb.map_ptr = NULL;
   1135 
   1136 	if ((open_flags & O_ACCMODE) == O_WRONLY) {
   1137 		goto fail;
   1138 	}
   1139 
   1140 	if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE;
   1141 
   1142 	tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY);
   1143 
   1144         if (name != NULL) {
   1145             tdb.fd = open(name, open_flags, mode);
   1146             if (tdb.fd == -1) {
   1147 		goto fail;
   1148             }
   1149         }
   1150 
   1151 	/* ensure there is only one process initialising at once */
   1152 	tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_SET, F_WRLCK, F_SETLKW);
   1153 
   1154 	if (tdb_flags & TDB_CLEAR_IF_FIRST) {
   1155 		/* we need to zero the database if we are the only
   1156 		   one with it open */
   1157 		if (tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_WRLCK, F_SETLK) == 0) {
   1158 			ftruncate(tdb.fd, 0);
   1159 			tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLK);
   1160 		}
   1161 	}
   1162 
   1163 	/* leave this lock in place */
   1164 	tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_RDLCK, F_SETLKW);
   1165 
   1166 	if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) ||
   1167 	    strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 ||
   1168 	    tdb.header.version != TDB_VERSION) {
   1169 		/* its not a valid database - possibly initialise it */
   1170 		if (!(open_flags & O_CREAT)) {
   1171 			goto fail;
   1172 		}
   1173 		if (tdb_new_database(&tdb, hash_size) == -1) goto fail;
   1174 
   1175 		lseek(tdb.fd, 0, SEEK_SET);
   1176 		if (tdb.fd != -1 && read(tdb.fd, &tdb.header,
   1177                                          sizeof(tdb.header)) !=
   1178                                          sizeof(tdb.header))
   1179                     goto fail;
   1180 	}
   1181 
   1182         if (tdb.fd != -1) {
   1183             fstat(tdb.fd, &st);
   1184 
   1185             /* map the database and fill in the return structure */
   1186             tdb.name = name ? strdup(name) : NULL;
   1187             tdb.map_size = st.st_size;
   1188         }
   1189 
   1190         tdb.locked = (int *)calloc(tdb.header.hash_size+1,
   1191                                    sizeof(tdb.locked[0]));
   1192         if (!tdb.locked) {
   1193             goto fail;
   1194         }
   1195 
   1196 #if HAVE_MMAP
   1197         if (tdb.fd != -1) {
   1198             tdb.map_ptr = (void *)mmap(NULL, st.st_size,
   1199                                        tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE,
   1200                                        MAP_SHARED | MAP_FILE, tdb.fd, 0);
   1201 	    if (tdb->map_ptr == MAP_FAILED)
   1202 		    tdb->map_ptr = NULL;
   1203         }
   1204 #endif
   1205 
   1206 	ret = (TDB_CONTEXT *)malloc(sizeof(tdb));
   1207 	if (!ret) goto fail;
   1208 
   1209 	*ret = tdb;
   1210 
   1211 #if TDB_DEBUG
   1212 	printf("mapped database of hash_size %u map_size=%u\n",
   1213 	       hash_size, tdb.map_size);
   1214 #endif
   1215 
   1216 	tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLKW);
   1217 	return ret;
   1218 
   1219  fail:
   1220         if (tdb.name) free(tdb.name);
   1221 	if (tdb.fd != -1) close(tdb.fd);
   1222 	if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size);
   1223 
   1224 	return NULL;
   1225 }
   1226 
   1227 /* close a database */
   1228 int tdb_close(TDB_CONTEXT *tdb)
   1229 {
   1230 	if (!tdb) return -1;
   1231 
   1232 	if (tdb->name) free(tdb->name);
   1233 	if (tdb->fd != -1) close(tdb->fd);
   1234 	if (tdb->locked) free(tdb->locked);
   1235 
   1236 	if (tdb->map_ptr) {
   1237             if (tdb->fd != -1) {
   1238                 munmap(tdb->map_ptr, tdb->map_size);
   1239             } else {
   1240                 free(tdb->map_ptr);
   1241             }
   1242         }
   1243 
   1244 	memset(tdb, 0, sizeof(*tdb));
   1245 	free(tdb);
   1246 
   1247 	return 0;
   1248 }
   1249 
   1250 /* lock the database. If we already have it locked then don't do anything */
   1251 int tdb_writelock(TDB_CONTEXT *tdb)
   1252 {
   1253         if (tdb == NULL) {
   1254 #ifdef TDB_DEBUG
   1255             printf("tdb_writelock() called with null context\n");
   1256 #endif
   1257             return -1;
   1258         }
   1259 
   1260 	return tdb_lock(tdb, -1);
   1261 }
   1262 
   1263 /* unlock the database. */
   1264 int tdb_writeunlock(TDB_CONTEXT *tdb)
   1265 {
   1266         if (tdb == NULL) {
   1267 #ifdef TDB_DEBUG
   1268             printf("tdb_writeunlock() called with null context\n");
   1269 #endif
   1270             return -1;
   1271         }
   1272 
   1273 	return tdb_unlock(tdb, -1);
   1274 }
   1275 
   1276 int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
   1277 {
   1278         if (tdb == NULL) {
   1279 #ifdef TDB_DEBUG
   1280             printf("tdb_lockchain() called with null context\n");
   1281 #endif
   1282             return -1;
   1283         }
   1284 
   1285 	return tdb_lock(tdb, BUCKET(tdb_hash(&key)));
   1286 }
   1287 
   1288 
   1289 int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key)
   1290 {
   1291         if (tdb == NULL) {
   1292 #ifdef TDB_DEBUG
   1293             printf("tdb_unlockchain() called with null context\n");
   1294 #endif
   1295             return -1;
   1296         }
   1297 
   1298 	return tdb_unlock(tdb, BUCKET(tdb_hash(&key)));
   1299 }
   1300