Home | History | Annotate | Line # | Download | only in pax
tables.c revision 1.7
      1 /*	$NetBSD: tables.c,v 1.7 1997/07/20 20:32:45 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1992 Keith Muller.
      5  * Copyright (c) 1992, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Keith Muller of the University of California, San Diego.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #include <sys/cdefs.h>
     41 #ifndef lint
     42 #if 0
     43 static char sccsid[] = "@(#)tables.c	8.1 (Berkeley) 5/31/93";
     44 #else
     45 __RCSID("$NetBSD: tables.c,v 1.7 1997/07/20 20:32:45 christos Exp $");
     46 #endif
     47 #endif /* not lint */
     48 
     49 #include <sys/types.h>
     50 #include <sys/time.h>
     51 #include <sys/stat.h>
     52 #include <sys/param.h>
     53 #include <sys/fcntl.h>
     54 #include <stdio.h>
     55 #include <ctype.h>
     56 #include <string.h>
     57 #include <unistd.h>
     58 #include <errno.h>
     59 #include <stdlib.h>
     60 #include "pax.h"
     61 #include "tables.h"
     62 #include "extern.h"
     63 
     64 /*
     65  * Routines for controlling the contents of all the different databases pax
     66  * keeps. Tables are dynamically created only when they are needed. The
     67  * goal was speed and the ability to work with HUGE archives. The databases
     68  * were kept simple, but do have complex rules for when the contents change.
     69  * As of this writing, the posix library functions were more complex than
     70  * needed for this application (pax databases have very short lifetimes and
     71  * do not survive after pax is finished). Pax is required to handle very
     72  * large archives. These database routines carefully combine memory usage and
     73  * temporary file storage in ways which will not significantly impact runtime
     74  * performance while allowing the largest possible archives to be handled.
     75  * Trying to force the fit to the posix databases routines was not considered
     76  * time well spent.
     77  */
     78 
     79 static HRDLNK **ltab = NULL;	/* hard link table for detecting hard links */
     80 static FTM **ftab = NULL;	/* file time table for updating arch */
     81 static NAMT **ntab = NULL;	/* interactive rename storage table */
     82 static DEVT **dtab = NULL;	/* device/inode mapping tables */
     83 static ATDIR **atab = NULL;	/* file tree directory time reset table */
     84 static int dirfd = -1;		/* storage for setting created dir time/mode */
     85 static u_long dircnt;		/* entries in dir time/mode storage */
     86 static int ffd = -1;		/* tmp file for file time table name storage */
     87 
     88 static DEVT *chk_dev __P((dev_t, int));
     89 
     90 /*
     91  * hard link table routines
     92  *
     93  * The hard link table tries to detect hard links to files using the device and
     94  * inode values. We do this when writing an archive, so we can tell the format
     95  * write routine that this file is a hard link to another file. The format
     96  * write routine then can store this file in whatever way it wants (as a hard
     97  * link if the format supports that like tar, or ignore this info like cpio).
     98  * (Actually a field in the format driver table tells us if the format wants
     99  * hard link info. if not, we do not waste time looking for them). We also use
    100  * the same table when reading an archive. In that situation, this table is
    101  * used by the format read routine to detect hard links from stored dev and
    102  * inode numbers (like cpio). This will allow pax to create a link when one
    103  * can be detected by the archive format.
    104  */
    105 
    106 /*
    107  * lnk_start
    108  *	Creates the hard link table.
    109  * Return:
    110  *	0 if created, -1 if failure
    111  */
    112 
    113 #if __STDC__
    114 int
    115 lnk_start(void)
    116 #else
    117 int
    118 lnk_start()
    119 #endif
    120 {
    121 	if (ltab != NULL)
    122 		return(0);
    123  	if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) {
    124                 tty_warn(1, "Cannot allocate memory for hard link table");
    125                 return(-1);
    126         }
    127 	return(0);
    128 }
    129 
    130 /*
    131  * chk_lnk()
    132  *	Looks up entry in hard link hash table. If found, it copies the name
    133  *	of the file it is linked to (we already saw that file) into ln_name.
    134  *	lnkcnt is decremented and if goes to 1 the node is deleted from the
    135  *	database. (We have seen all the links to this file). If not found,
    136  *	we add the file to the database if it has the potential for having
    137  *	hard links to other files we may process (it has a link count > 1)
    138  * Return:
    139  *	if found returns 1; if not found returns 0; -1 on error
    140  */
    141 
    142 #if __STDC__
    143 int
    144 chk_lnk(ARCHD *arcn)
    145 #else
    146 int
    147 chk_lnk(arcn)
    148 	ARCHD *arcn;
    149 #endif
    150 {
    151 	HRDLNK *pt;
    152 	HRDLNK **ppt;
    153 	u_int indx;
    154 
    155 	if (ltab == NULL)
    156 		return(-1);
    157 	/*
    158 	 * ignore those nodes that cannot have hard links
    159 	 */
    160 	if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1))
    161 		return(0);
    162 
    163 	/*
    164 	 * hash inode number and look for this file
    165 	 */
    166 	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
    167 	if ((pt = ltab[indx]) != NULL) {
    168 		/*
    169 		 * it's hash chain in not empty, walk down looking for it
    170 		 */
    171 		ppt = &(ltab[indx]);
    172 		while (pt != NULL) {
    173 			if ((pt->ino == arcn->sb.st_ino) &&
    174 			    (pt->dev == arcn->sb.st_dev))
    175 				break;
    176 			ppt = &(pt->fow);
    177 			pt = pt->fow;
    178 		}
    179 
    180 		if (pt != NULL) {
    181 			/*
    182 			 * found a link. set the node type and copy in the
    183 			 * name of the file it is to link to. we need to
    184 			 * handle hardlinks to regular files differently than
    185 			 * other links.
    186 			 */
    187 			arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name,
    188 				PAXPATHLEN+1);
    189 			if (arcn->type == PAX_REG)
    190 				arcn->type = PAX_HRG;
    191 			else
    192 				arcn->type = PAX_HLK;
    193 
    194 			/*
    195 			 * if we have found all the links to this file, remove
    196 			 * it from the database
    197 			 */
    198 			if (--pt->nlink <= 1) {
    199 				*ppt = pt->fow;
    200 				(void)free((char *)pt->name);
    201 				(void)free((char *)pt);
    202 			}
    203 			return(1);
    204 		}
    205 	}
    206 
    207 	/*
    208 	 * we never saw this file before. It has links so we add it to the
    209 	 * front of this hash chain
    210 	 */
    211 	if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) {
    212 		if ((pt->name = strdup(arcn->name)) != NULL) {
    213 			pt->dev = arcn->sb.st_dev;
    214 			pt->ino = arcn->sb.st_ino;
    215 			pt->nlink = arcn->sb.st_nlink;
    216 			pt->fow = ltab[indx];
    217 			ltab[indx] = pt;
    218 			return(0);
    219 		}
    220 		(void)free((char *)pt);
    221 	}
    222 
    223 	tty_warn(1, "Hard link table out of memory");
    224 	return(-1);
    225 }
    226 
    227 /*
    228  * purg_lnk
    229  *	remove reference for a file that we may have added to the data base as
    230  *	a potential source for hard links. We ended up not using the file, so
    231  *	we do not want to accidently point another file at it later on.
    232  */
    233 
    234 #if __STDC__
    235 void
    236 purg_lnk(ARCHD *arcn)
    237 #else
    238 void
    239 purg_lnk(arcn)
    240 	ARCHD *arcn;
    241 #endif
    242 {
    243 	HRDLNK *pt;
    244 	HRDLNK **ppt;
    245 	u_int indx;
    246 
    247 	if (ltab == NULL)
    248 		return;
    249 	/*
    250 	 * do not bother to look if it could not be in the database
    251 	 */
    252 	if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) ||
    253 	    (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG))
    254 		return;
    255 
    256 	/*
    257 	 * find the hash chain for this inode value, if empty return
    258 	 */
    259 	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
    260 	if ((pt = ltab[indx]) == NULL)
    261 		return;
    262 
    263 	/*
    264 	 * walk down the list looking for the inode/dev pair, unlink and
    265 	 * free if found
    266 	 */
    267 	ppt = &(ltab[indx]);
    268 	while (pt != NULL) {
    269 		if ((pt->ino == arcn->sb.st_ino) &&
    270 		    (pt->dev == arcn->sb.st_dev))
    271 			break;
    272 		ppt = &(pt->fow);
    273 		pt = pt->fow;
    274 	}
    275 	if (pt == NULL)
    276 		return;
    277 
    278 	/*
    279 	 * remove and free it
    280 	 */
    281 	*ppt = pt->fow;
    282 	(void)free((char *)pt->name);
    283 	(void)free((char *)pt);
    284 }
    285 
    286 /*
    287  * lnk_end()
    288  *	pull apart a existing link table so we can reuse it. We do this between
    289  *	read and write phases of append with update. (The format may have
    290  *	used the link table, and we need to start with a fresh table for the
    291  *	write phase
    292  */
    293 
    294 #if __STDC__
    295 void
    296 lnk_end(void)
    297 #else
    298 void
    299 lnk_end()
    300 #endif
    301 {
    302 	int i;
    303 	HRDLNK *pt;
    304 	HRDLNK *ppt;
    305 
    306 	if (ltab == NULL)
    307 		return;
    308 
    309 	for (i = 0; i < L_TAB_SZ; ++i) {
    310 		if (ltab[i] == NULL)
    311 			continue;
    312 		pt = ltab[i];
    313 		ltab[i] = NULL;
    314 
    315 		/*
    316 		 * free up each entry on this chain
    317 		 */
    318 		while (pt != NULL) {
    319 			ppt = pt;
    320 			pt = ppt->fow;
    321 			(void)free((char *)ppt->name);
    322 			(void)free((char *)ppt);
    323 		}
    324 	}
    325 	return;
    326 }
    327 
    328 /*
    329  * modification time table routines
    330  *
    331  * The modification time table keeps track of last modification times for all
    332  * files stored in an archive during a write phase when -u is set. We only
    333  * add a file to the archive if it is newer than a file with the same name
    334  * already stored on the archive (if there is no other file with the same
    335  * name on the archive it is added). This applies to writes and appends.
    336  * An append with an -u must read the archive and store the modification time
    337  * for every file on that archive before starting the write phase. It is clear
    338  * that this is one HUGE database. To save memory space, the actual file names
    339  * are stored in a scatch file and indexed by an in memory hash table. The
    340  * hash table is indexed by hashing the file path. The nodes in the table store
    341  * the length of the filename and the lseek offset within the scratch file
    342  * where the actual name is stored. Since there are never any deletions to this
    343  * table, fragmentation of the scratch file is never a issue. Lookups seem to
    344  * not exhibit any locality at all (files in the database are rarely
    345  * looked up more than once...). So caching is just a waste of memory. The
    346  * only limitation is the amount of scatch file space available to store the
    347  * path names.
    348  */
    349 
    350 /*
    351  * ftime_start()
    352  *	create the file time hash table and open for read/write the scratch
    353  *	file. (after created it is unlinked, so when we exit we leave
    354  *	no witnesses).
    355  * Return:
    356  *	0 if the table and file was created ok, -1 otherwise
    357  */
    358 
    359 #if __STDC__
    360 int
    361 ftime_start(void)
    362 #else
    363 int
    364 ftime_start()
    365 #endif
    366 {
    367 	char *pt;
    368 
    369 	if (ftab != NULL)
    370 		return(0);
    371  	if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) {
    372                 tty_warn(1, "Cannot allocate memory for file time table");
    373                 return(-1);
    374         }
    375 
    376 	/*
    377 	 * get random name and create temporary scratch file, unlink name
    378 	 * so it will get removed on exit
    379 	 */
    380 	pt = strdup("/tmp/paxXXXXXX");
    381 	if (pt == NULL) {
    382 		tty_warn(1, "Unable to allocate memory");
    383 		return(-1);
    384 	}
    385 	if ((ffd = mkstemp(pt)) == -1) {
    386 		syswarn(1, errno, "Unable to create temporary file: %s", pt);
    387 		free(pt);
    388 		return(-1);
    389 	}
    390 
    391 	(void)unlink(pt);
    392 	free(pt);
    393 	return(0);
    394 }
    395 
    396 /*
    397  * chk_ftime()
    398  *	looks up entry in file time hash table. If not found, the file is
    399  *	added to the hash table and the file named stored in the scratch file.
    400  *	If a file with the same name is found, the file times are compared and
    401  *	the most recent file time is retained. If the new file was younger (or
    402  *	was not in the database) the new file is selected for storage.
    403  * Return:
    404  *	0 if file should be added to the archive, 1 if it should be skipped,
    405  *	-1 on error
    406  */
    407 
    408 #if __STDC__
    409 int
    410 chk_ftime(ARCHD *arcn)
    411 #else
    412 int
    413 chk_ftime(arcn)
    414 	ARCHD *arcn;
    415 #endif
    416 {
    417 	FTM *pt;
    418 	int namelen;
    419 	u_int indx;
    420 	char ckname[PAXPATHLEN+1];
    421 
    422 	/*
    423 	 * no info, go ahead and add to archive
    424 	 */
    425 	if (ftab == NULL)
    426 		return(0);
    427 
    428 	/*
    429 	 * hash the pathname and look up in table
    430 	 */
    431 	namelen = arcn->nlen;
    432 	indx = st_hash(arcn->name, namelen, F_TAB_SZ);
    433 	if ((pt = ftab[indx]) != NULL) {
    434 		/*
    435 		 * the hash chain is not empty, walk down looking for match
    436 		 * only read up the path names if the lengths match, speeds
    437 		 * up the search a lot
    438 		 */
    439 		while (pt != NULL) {
    440 			if (pt->namelen == namelen) {
    441 				/*
    442 				 * potential match, have to read the name
    443 				 * from the scratch file.
    444 				 */
    445 				if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) {
    446 					syswarn(1, errno,
    447 					    "Failed ftime table seek");
    448 					return(-1);
    449 				}
    450 				if (read(ffd, ckname, namelen) != namelen) {
    451 					syswarn(1, errno,
    452 					    "Failed ftime table read");
    453 					return(-1);
    454 				}
    455 
    456 				/*
    457 				 * if the names match, we are done
    458 				 */
    459 				if (!strncmp(ckname, arcn->name, namelen))
    460 					break;
    461 			}
    462 
    463 			/*
    464 			 * try the next entry on the chain
    465 			 */
    466 			pt = pt->fow;
    467 		}
    468 
    469 		if (pt != NULL) {
    470 			/*
    471 			 * found the file, compare the times, save the newer
    472 			 */
    473 			if (arcn->sb.st_mtime > pt->mtime) {
    474 				/*
    475 				 * file is newer
    476 				 */
    477 				pt->mtime = arcn->sb.st_mtime;
    478 				return(0);
    479 			}
    480 			/*
    481 			 * file is older
    482 			 */
    483 			return(1);
    484 		}
    485 	}
    486 
    487 	/*
    488 	 * not in table, add it
    489 	 */
    490 	if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) {
    491 		/*
    492 		 * add the name at the end of the scratch file, saving the
    493 		 * offset. add the file to the head of the hash chain
    494 		 */
    495 		if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) {
    496 			if (write(ffd, arcn->name, namelen) == namelen) {
    497 				pt->mtime = arcn->sb.st_mtime;
    498 				pt->namelen = namelen;
    499 				pt->fow = ftab[indx];
    500 				ftab[indx] = pt;
    501 				return(0);
    502 			}
    503 			syswarn(1, errno, "Failed write to file time table");
    504 		} else
    505 			syswarn(1, errno, "Failed seek on file time table");
    506 	} else
    507 		tty_warn(1, "File time table ran out of memory");
    508 
    509 	if (pt != NULL)
    510 		(void)free((char *)pt);
    511 	return(-1);
    512 }
    513 
    514 /*
    515  * Interactive rename table routines
    516  *
    517  * The interactive rename table keeps track of the new names that the user
    518  * assignes to files from tty input. Since this map is unique for each file
    519  * we must store it in case there is a reference to the file later in archive
    520  * (a link). Otherwise we will be unable to find the file we know was
    521  * extracted. The remapping of these files is stored in a memory based hash
    522  * table (it is assumed since input must come from /dev/tty, it is unlikely to
    523  * be a very large table).
    524  */
    525 
    526 /*
    527  * name_start()
    528  *	create the interactive rename table
    529  * Return:
    530  *	0 if successful, -1 otherwise
    531  */
    532 
    533 #if __STDC__
    534 int
    535 name_start(void)
    536 #else
    537 int
    538 name_start()
    539 #endif
    540 {
    541 	if (ntab != NULL)
    542 		return(0);
    543  	if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) {
    544                 tty_warn(1,
    545 		    "Cannot allocate memory for interactive rename table");
    546                 return(-1);
    547         }
    548 	return(0);
    549 }
    550 
    551 /*
    552  * add_name()
    553  *	add the new name to old name mapping just created by the user.
    554  *	If an old name mapping is found (there may be duplicate names on an
    555  *	archive) only the most recent is kept.
    556  * Return:
    557  *	0 if added, -1 otherwise
    558  */
    559 
    560 #if __STDC__
    561 int
    562 add_name(char *oname, int onamelen, char *nname)
    563 #else
    564 int
    565 add_name(oname, onamelen, nname)
    566 	char *oname;
    567 	int onamelen;
    568 	char *nname;
    569 #endif
    570 {
    571 	NAMT *pt;
    572 	u_int indx;
    573 
    574 	if (ntab == NULL) {
    575 		/*
    576 		 * should never happen
    577 		 */
    578 		tty_warn(0, "No interactive rename table, links may fail\n");
    579 		return(0);
    580 	}
    581 
    582 	/*
    583 	 * look to see if we have already mapped this file, if so we
    584 	 * will update it
    585 	 */
    586 	indx = st_hash(oname, onamelen, N_TAB_SZ);
    587 	if ((pt = ntab[indx]) != NULL) {
    588 		/*
    589 		 * look down the has chain for the file
    590 		 */
    591 		while ((pt != NULL) && (strcmp(oname, pt->oname) != 0))
    592 			pt = pt->fow;
    593 
    594 		if (pt != NULL) {
    595 			/*
    596 			 * found an old mapping, replace it with the new one
    597 			 * the user just input (if it is different)
    598 			 */
    599 			if (strcmp(nname, pt->nname) == 0)
    600 				return(0);
    601 
    602 			(void)free((char *)pt->nname);
    603 			if ((pt->nname = strdup(nname)) == NULL) {
    604 				tty_warn(1, "Cannot update rename table");
    605 				return(-1);
    606 			}
    607 			return(0);
    608 		}
    609 	}
    610 
    611 	/*
    612 	 * this is a new mapping, add it to the table
    613 	 */
    614 	if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) {
    615 		if ((pt->oname = strdup(oname)) != NULL) {
    616 			if ((pt->nname = strdup(nname)) != NULL) {
    617 				pt->fow = ntab[indx];
    618 				ntab[indx] = pt;
    619 				return(0);
    620 			}
    621 			(void)free((char *)pt->oname);
    622 		}
    623 		(void)free((char *)pt);
    624 	}
    625 	tty_warn(1, "Interactive rename table out of memory");
    626 	return(-1);
    627 }
    628 
    629 /*
    630  * sub_name()
    631  *	look up a link name to see if it points at a file that has been
    632  *	remapped by the user. If found, the link is adjusted to contain the
    633  *	new name (oname is the link to name)
    634  */
    635 
    636 #if __STDC__
    637 void
    638 sub_name(char *oname, int *onamelen)
    639 #else
    640 void
    641 sub_name(oname, onamelen)
    642 	char *oname;
    643 	int *onamelen;
    644 #endif
    645 {
    646 	NAMT *pt;
    647 	u_int indx;
    648 
    649 	if (ntab == NULL)
    650 		return;
    651 	/*
    652 	 * look the name up in the hash table
    653 	 */
    654 	indx = st_hash(oname, *onamelen, N_TAB_SZ);
    655 	if ((pt = ntab[indx]) == NULL)
    656 		return;
    657 
    658 	while (pt != NULL) {
    659 		/*
    660 		 * walk down the hash cahin looking for a match
    661 		 */
    662 		if (strcmp(oname, pt->oname) == 0) {
    663 			/*
    664 			 * found it, replace it with the new name
    665 			 * and return (we know that oname has enough space)
    666 			 */
    667 			*onamelen = l_strncpy(oname, pt->nname, PAXPATHLEN+1);
    668 			return;
    669 		}
    670 		pt = pt->fow;
    671 	}
    672 
    673 	/*
    674 	 * no match, just return
    675 	 */
    676 	return;
    677 }
    678 
    679 /*
    680  * device/inode mapping table routines
    681  * (used with formats that store device and inodes fields)
    682  *
    683  * device/inode mapping tables remap the device field in a archive header. The
    684  * device/inode fields are used to determine when files are hard links to each
    685  * other. However these values have very little meaning outside of that. This
    686  * database is used to solve one of two different problems.
    687  *
    688  * 1) when files are appended to an archive, while the new files may have hard
    689  * links to each other, you cannot determine if they have hard links to any
    690  * file already stored on the archive from a prior run of pax. We must assume
    691  * that these inode/device pairs are unique only within a SINGLE run of pax
    692  * (which adds a set of files to an archive). So we have to make sure the
    693  * inode/dev pairs we add each time are always unique. We do this by observing
    694  * while the inode field is very dense, the use of the dev field is fairly
    695  * sparse. Within each run of pax, we remap any device number of a new archive
    696  * member that has a device number used in a prior run and already stored in a
    697  * file on the archive. During the read phase of the append, we store the
    698  * device numbers used and mark them to not be used by any file during the
    699  * write phase. If during write we go to use one of those old device numbers,
    700  * we remap it to a new value.
    701  *
    702  * 2) Often the fields in the archive header used to store these values are
    703  * too small to store the entire value. The result is an inode or device value
    704  * which can be truncated. This really can foul up an archive. With truncation
    705  * we end up creating links between files that are really not links (after
    706  * truncation the inodes are the same value). We address that by detecting
    707  * truncation and forcing a remap of the device field to split truncated
    708  * inodes away from each other. Each truncation creates a pattern of bits that
    709  * are removed. We use this pattern of truncated bits to partition the inodes
    710  * on a single device to many different devices (each one represented by the
    711  * truncated bit pattern). All inodes on the same device that have the same
    712  * truncation pattern are mapped to the same new device. Two inodes that
    713  * truncate to the same value clearly will always have different truncation
    714  * bit patterns, so they will be split from away each other. When we spot
    715  * device truncation we remap the device number to a non truncated value.
    716  * (for more info see table.h for the data structures involved).
    717  */
    718 
    719 /*
    720  * dev_start()
    721  *	create the device mapping table
    722  * Return:
    723  *	0 if successful, -1 otherwise
    724  */
    725 
    726 #if __STDC__
    727 int
    728 dev_start(void)
    729 #else
    730 int
    731 dev_start()
    732 #endif
    733 {
    734 	if (dtab != NULL)
    735 		return(0);
    736  	if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) {
    737                 tty_warn(1, "Cannot allocate memory for device mapping table");
    738                 return(-1);
    739         }
    740 	return(0);
    741 }
    742 
    743 /*
    744  * add_dev()
    745  *	add a device number to the table. this will force the device to be
    746  *	remapped to a new value if it be used during a write phase. This
    747  *	function is called during the read phase of an append to prohibit the
    748  *	use of any device number already in the archive.
    749  * Return:
    750  *	0 if added ok, -1 otherwise
    751  */
    752 
    753 #if __STDC__
    754 int
    755 add_dev(ARCHD *arcn)
    756 #else
    757 int
    758 add_dev(arcn)
    759 	ARCHD *arcn;
    760 #endif
    761 {
    762 	if (chk_dev(arcn->sb.st_dev, 1) == NULL)
    763 		return(-1);
    764 	return(0);
    765 }
    766 
    767 /*
    768  * chk_dev()
    769  *	check for a device value in the device table. If not found and the add
    770  *	flag is set, it is added. This does NOT assign any mapping values, just
    771  *	adds the device number as one that need to be remapped. If this device
    772  *	is alread mapped, just return with a pointer to that entry.
    773  * Return:
    774  *	pointer to the entry for this device in the device map table. Null
    775  *	if the add flag is not set and the device is not in the table (it is
    776  *	not been seen yet). If add is set and the device cannot be added, null
    777  *	is returned (indicates an error).
    778  */
    779 
    780 #if __STDC__
    781 static DEVT *
    782 chk_dev(dev_t dev, int add)
    783 #else
    784 static DEVT *
    785 chk_dev(dev, add)
    786 	dev_t dev;
    787 	int add;
    788 #endif
    789 {
    790 	DEVT *pt;
    791 	u_int indx;
    792 
    793 	if (dtab == NULL)
    794 		return(NULL);
    795 	/*
    796 	 * look to see if this device is already in the table
    797 	 */
    798 	indx = ((unsigned)dev) % D_TAB_SZ;
    799 	if ((pt = dtab[indx]) != NULL) {
    800 		while ((pt != NULL) && (pt->dev != dev))
    801 			pt = pt->fow;
    802 
    803 		/*
    804 		 * found it, return a pointer to it
    805 		 */
    806 		if (pt != NULL)
    807 			return(pt);
    808 	}
    809 
    810 	/*
    811 	 * not in table, we add it only if told to as this may just be a check
    812 	 * to see if a device number is being used.
    813 	 */
    814 	if (add == 0)
    815 		return(NULL);
    816 
    817 	/*
    818 	 * allocate a node for this device and add it to the front of the hash
    819 	 * chain. Note we do not assign remaps values here, so the pt->list
    820 	 * list must be NULL.
    821 	 */
    822 	if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) {
    823 		tty_warn(1, "Device map table out of memory");
    824 		return(NULL);
    825 	}
    826 	pt->dev = dev;
    827 	pt->list = NULL;
    828 	pt->fow = dtab[indx];
    829 	dtab[indx] = pt;
    830 	return(pt);
    831 }
    832 /*
    833  * map_dev()
    834  *	given an inode and device storage mask (the mask has a 1 for each bit
    835  *	the archive format is able to store in a header), we check for inode
    836  *	and device truncation and remap the device as required. Device mapping
    837  *	can also occur when during the read phase of append a device number was
    838  *	seen (and was marked as do not use during the write phase). WE ASSUME
    839  *	that unsigned longs are the same size or bigger than the fields used
    840  *	for ino_t and dev_t. If not the types will have to be changed.
    841  * Return:
    842  *	0 if all ok, -1 otherwise.
    843  */
    844 
    845 #if __STDC__
    846 int
    847 map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)
    848 #else
    849 int
    850 map_dev(arcn, dev_mask, ino_mask)
    851 	ARCHD *arcn;
    852 	u_long dev_mask;
    853 	u_long ino_mask;
    854 #endif
    855 {
    856 	DEVT *pt;
    857 	DLIST *dpt;
    858 	static dev_t lastdev = 0;	/* next device number to try */
    859 	int trc_ino = 0;
    860 	int trc_dev = 0;
    861 	ino_t trunc_bits = 0;
    862 	ino_t nino;
    863 
    864 	if (dtab == NULL)
    865 		return(0);
    866 	/*
    867 	 * check for device and inode truncation, and extract the truncated
    868 	 * bit pattern.
    869 	 */
    870 	if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)
    871 		++trc_dev;
    872 	if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {
    873 		++trc_ino;
    874 		trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);
    875 	}
    876 
    877 	/*
    878 	 * see if this device is already being mapped, look up the device
    879 	 * then find the truncation bit pattern which applies
    880 	 */
    881 	if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) {
    882 		/*
    883 		 * this device is already marked to be remapped
    884 		 */
    885 		for (dpt = pt->list; dpt != NULL; dpt = dpt->fow)
    886 			if (dpt->trunc_bits == trunc_bits)
    887 				break;
    888 
    889 		if (dpt != NULL) {
    890 			/*
    891 			 * we are being remapped for this device and pattern
    892 			 * change the device number to be stored and return
    893 			 */
    894 			arcn->sb.st_dev = dpt->dev;
    895 			arcn->sb.st_ino = nino;
    896 			return(0);
    897 		}
    898 	} else {
    899 		/*
    900 		 * this device is not being remapped YET. if we do not have any
    901 		 * form of truncation, we do not need a remap
    902 		 */
    903 		if (!trc_ino && !trc_dev)
    904 			return(0);
    905 
    906 		/*
    907 		 * we have truncation, have to add this as a device to remap
    908 		 */
    909 		if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL)
    910 			goto bad;
    911 
    912 		/*
    913 		 * if we just have a truncated inode, we have to make sure that
    914 		 * all future inodes that do not truncate (they have the
    915 		 * truncation pattern of all 0's) continue to map to the same
    916 		 * device number. We probably have already written inodes with
    917 		 * this device number to the archive with the truncation
    918 		 * pattern of all 0's. So we add the mapping for all 0's to the
    919 		 * same device number.
    920 		 */
    921 		if (!trc_dev && (trunc_bits != 0)) {
    922 			if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)
    923 				goto bad;
    924 			dpt->trunc_bits = 0;
    925 			dpt->dev = arcn->sb.st_dev;
    926 			dpt->fow = pt->list;
    927 			pt->list = dpt;
    928 		}
    929 	}
    930 
    931 	/*
    932 	 * look for a device number not being used. We must watch for wrap
    933 	 * around on lastdev (so we do not get stuck looking forever!)
    934 	 */
    935 	while (++lastdev > 0) {
    936 		if (chk_dev(lastdev, 0) != NULL)
    937 			continue;
    938 		/*
    939 		 * found an unused value. If we have reached truncation point
    940 		 * for this format we are hosed, so we give up. Otherwise we
    941 		 * mark it as being used.
    942 		 */
    943 		if (((lastdev & ((dev_t)dev_mask)) != lastdev) ||
    944 		    (chk_dev(lastdev, 1) == NULL))
    945 			goto bad;
    946 		break;
    947 	}
    948 
    949 	if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL))
    950 		goto bad;
    951 
    952 	/*
    953 	 * got a new device number, store it under this truncation pattern.
    954 	 * change the device number this file is being stored with.
    955 	 */
    956 	dpt->trunc_bits = trunc_bits;
    957 	dpt->dev = lastdev;
    958 	dpt->fow = pt->list;
    959 	pt->list = dpt;
    960 	arcn->sb.st_dev = lastdev;
    961 	arcn->sb.st_ino = nino;
    962 	return(0);
    963 
    964     bad:
    965 	tty_warn(1,
    966 	    "Unable to fix truncated inode/device field when storing %s",
    967 	    arcn->name);
    968 	tty_warn(0, "Archive may create improper hard links when extracted");
    969 	return(0);
    970 }
    971 
    972 /*
    973  * directory access/mod time reset table routines (for directories READ by pax)
    974  *
    975  * The pax -t flag requires that access times of archive files to be the same
    976  * before being read by pax. For regular files, access time is restored after
    977  * the file has been copied. This database provides the same functionality for
    978  * directories read during file tree traversal. Restoring directory access time
    979  * is more complex than files since directories may be read several times until
    980  * all the descendants in their subtree are visited by fts. Directory access
    981  * and modification times are stored during the fts pre-order visit (done
    982  * before any descendants in the subtree is visited) and restored after the
    983  * fts post-order visit (after all the descendants have been visited). In the
    984  * case of premature exit from a subtree (like from the effects of -n), any
    985  * directory entries left in this database are reset during final cleanup
    986  * operations of pax. Entries are hashed by inode number for fast lookup.
    987  */
    988 
    989 /*
    990  * atdir_start()
    991  *	create the directory access time database for directories READ by pax.
    992  * Return:
    993  *	0 is created ok, -1 otherwise.
    994  */
    995 
    996 #if __STDC__
    997 int
    998 atdir_start(void)
    999 #else
   1000 int
   1001 atdir_start()
   1002 #endif
   1003 {
   1004 	if (atab != NULL)
   1005 		return(0);
   1006  	if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) {
   1007                 tty_warn(1,
   1008 		    "Cannot allocate space for directory access time table");
   1009                 return(-1);
   1010         }
   1011 	return(0);
   1012 }
   1013 
   1014 
   1015 /*
   1016  * atdir_end()
   1017  *	walk through the directory access time table and reset the access time
   1018  *	of any directory who still has an entry left in the database. These
   1019  *	entries are for directories READ by pax
   1020  */
   1021 
   1022 #if __STDC__
   1023 void
   1024 atdir_end(void)
   1025 #else
   1026 void
   1027 atdir_end()
   1028 #endif
   1029 {
   1030 	ATDIR *pt;
   1031 	int i;
   1032 
   1033 	if (atab == NULL)
   1034 		return;
   1035 	/*
   1036 	 * for each non-empty hash table entry reset all the directories
   1037 	 * chained there.
   1038 	 */
   1039 	for (i = 0; i < A_TAB_SZ; ++i) {
   1040 		if ((pt = atab[i]) == NULL)
   1041 			continue;
   1042 		/*
   1043 		 * remember to force the times, set_ftime() looks at pmtime
   1044 		 * and patime, which only applies to things CREATED by pax,
   1045 		 * not read by pax. Read time reset is controlled by -t.
   1046 		 */
   1047 		for (; pt != NULL; pt = pt->fow)
   1048 			set_ftime(pt->name, pt->mtime, pt->atime, 1);
   1049 	}
   1050 }
   1051 
   1052 /*
   1053  * add_atdir()
   1054  *	add a directory to the directory access time table. Table is hashed
   1055  *	and chained by inode number. This is for directories READ by pax
   1056  */
   1057 
   1058 #if __STDC__
   1059 void
   1060 add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime)
   1061 #else
   1062 void
   1063 add_atdir(fname, dev, ino, mtime, atime)
   1064 	char *fname;
   1065 	dev_t dev;
   1066 	ino_t ino;
   1067 	time_t mtime;
   1068 	time_t atime;
   1069 #endif
   1070 {
   1071 	ATDIR *pt;
   1072 	u_int indx;
   1073 
   1074 	if (atab == NULL)
   1075 		return;
   1076 
   1077 	/*
   1078 	 * make sure this directory is not already in the table, if so just
   1079 	 * return (the older entry always has the correct time). The only
   1080 	 * way this will happen is when the same subtree can be traversed by
   1081 	 * different args to pax and the -n option is aborting fts out of a
   1082 	 * subtree before all the post-order visits have been made).
   1083 	 */
   1084 	indx = ((unsigned)ino) % A_TAB_SZ;
   1085 	if ((pt = atab[indx]) != NULL) {
   1086 		while (pt != NULL) {
   1087 			if ((pt->ino == ino) && (pt->dev == dev))
   1088 				break;
   1089 			pt = pt->fow;
   1090 		}
   1091 
   1092 		/*
   1093 		 * oops, already there. Leave it alone.
   1094 		 */
   1095 		if (pt != NULL)
   1096 			return;
   1097 	}
   1098 
   1099 	/*
   1100 	 * add it to the front of the hash chain
   1101 	 */
   1102 	if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) {
   1103 		if ((pt->name = strdup(fname)) != NULL) {
   1104 			pt->dev = dev;
   1105 			pt->ino = ino;
   1106 			pt->mtime = mtime;
   1107 			pt->atime = atime;
   1108 			pt->fow = atab[indx];
   1109 			atab[indx] = pt;
   1110 			return;
   1111 		}
   1112 		(void)free((char *)pt);
   1113 	}
   1114 
   1115 	tty_warn(1, "Directory access time reset table ran out of memory");
   1116 	return;
   1117 }
   1118 
   1119 /*
   1120  * get_atdir()
   1121  *	look up a directory by inode and device number to obtain the access
   1122  *	and modification time you want to set to. If found, the modification
   1123  *	and access time parameters are set and the entry is removed from the
   1124  *	table (as it is no longer needed). These are for directories READ by
   1125  *	pax
   1126  * Return:
   1127  *	0 if found, -1 if not found.
   1128  */
   1129 
   1130 #if __STDC__
   1131 int
   1132 get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime)
   1133 #else
   1134 int
   1135 get_atdir(dev, ino, mtime, atime)
   1136 	dev_t dev;
   1137 	ino_t ino;
   1138 	time_t *mtime;
   1139 	time_t *atime;
   1140 #endif
   1141 {
   1142 	ATDIR *pt;
   1143 	ATDIR **ppt;
   1144 	u_int indx;
   1145 
   1146 	if (atab == NULL)
   1147 		return(-1);
   1148 	/*
   1149 	 * hash by inode and search the chain for an inode and device match
   1150 	 */
   1151 	indx = ((unsigned)ino) % A_TAB_SZ;
   1152 	if ((pt = atab[indx]) == NULL)
   1153 		return(-1);
   1154 
   1155 	ppt = &(atab[indx]);
   1156 	while (pt != NULL) {
   1157 		if ((pt->ino == ino) && (pt->dev == dev))
   1158 			break;
   1159 		/*
   1160 		 * no match, go to next one
   1161 		 */
   1162 		ppt = &(pt->fow);
   1163 		pt = pt->fow;
   1164 	}
   1165 
   1166 	/*
   1167 	 * return if we did not find it.
   1168 	 */
   1169 	if (pt == NULL)
   1170 		return(-1);
   1171 
   1172 	/*
   1173 	 * found it. return the times and remove the entry from the table.
   1174 	 */
   1175 	*ppt = pt->fow;
   1176 	*mtime = pt->mtime;
   1177 	*atime = pt->atime;
   1178 	(void)free((char *)pt->name);
   1179 	(void)free((char *)pt);
   1180 	return(0);
   1181 }
   1182 
   1183 /*
   1184  * directory access mode and time storage routines (for directories CREATED
   1185  * by pax).
   1186  *
   1187  * Pax requires that extracted directories, by default, have their access/mod
   1188  * times and permissions set to the values specified in the archive. During the
   1189  * actions of extracting (and creating the destination subtree during -rw copy)
   1190  * directories extracted may be modified after being created. Even worse is
   1191  * that these directories may have been created with file permissions which
   1192  * prohibits any descendants of these directories from being extracted. When
   1193  * directories are created by pax, access rights may be added to permit the
   1194  * creation of files in their subtree. Every time pax creates a directory, the
   1195  * times and file permissions specified by the archive are stored. After all
   1196  * files have been extracted (or copied), these directories have their times
   1197  * and file modes reset to the stored values. The directory info is restored in
   1198  * reverse order as entries were added to the data file from root to leaf. To
   1199  * restore atime properly, we must go backwards. The data file consists of
   1200  * records with two parts, the file name followed by a DIRDATA trailer. The
   1201  * fixed sized trailer contains the size of the name plus the off_t location in
   1202  * the file. To restore we work backwards through the file reading the trailer
   1203  * then the file name.
   1204  */
   1205 
   1206 /*
   1207  * dir_start()
   1208  *	set up the directory time and file mode storage for directories CREATED
   1209  *	by pax.
   1210  * Return:
   1211  *	0 if ok, -1 otherwise
   1212  */
   1213 
   1214 #if __STDC__
   1215 int
   1216 dir_start(void)
   1217 #else
   1218 int
   1219 dir_start()
   1220 #endif
   1221 {
   1222 	char *pt;
   1223 
   1224 	if (dirfd != -1)
   1225 		return(0);
   1226 
   1227 	/*
   1228 	 * unlink the file so it goes away at termination by itself
   1229 	 */
   1230 	pt = strdup("/tmp/paxXXXXXX");
   1231 	if (pt == NULL) {
   1232 		tty_warn(1, "Unable to allocate memory");
   1233 		return(-1);
   1234 	}
   1235 	if ((dirfd = mkstemp(pt)) >= 0) {
   1236 		(void)unlink(pt);
   1237 		free(pt);
   1238 		return(0);
   1239 	}
   1240 	tty_warn(1, "Unable to create temporary file for directory times: %s",
   1241 	    pt);
   1242 	free(pt);
   1243 	return(-1);
   1244 }
   1245 
   1246 /*
   1247  * add_dir()
   1248  *	add the mode and times for a newly CREATED directory
   1249  *	name is name of the directory, psb the stat buffer with the data in it,
   1250  *	frc_mode is a flag that says whether to force the setting of the mode
   1251  *	(ignoring the user set values for preserving file mode). Frc_mode is
   1252  *	for the case where we created a file and found that the resulting
   1253  *	directory was not writeable and the user asked for file modes to NOT
   1254  *	be preserved. (we have to preserve what was created by default, so we
   1255  *	have to force the setting at the end. this is stated explicitly in the
   1256  *	pax spec)
   1257  */
   1258 
   1259 #if __STDC__
   1260 void
   1261 add_dir(char *name, int nlen, struct stat *psb, int frc_mode)
   1262 #else
   1263 void
   1264 add_dir(name, nlen, psb, frc_mode)
   1265 	char *name;
   1266 	int nlen;
   1267 	struct stat *psb;
   1268 	int frc_mode;
   1269 #endif
   1270 {
   1271 	DIRDATA dblk;
   1272 
   1273 	if (dirfd < 0)
   1274 		return;
   1275 
   1276 	/*
   1277 	 * get current position (where file name will start) so we can store it
   1278 	 * in the trailer
   1279 	 */
   1280 	if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) {
   1281 		tty_warn(1,
   1282 		    "Unable to store mode and times for directory: %s",name);
   1283 		return;
   1284 	}
   1285 
   1286 	/*
   1287 	 * write the file name followed by the trailer
   1288 	 */
   1289 	dblk.nlen = nlen + 1;
   1290 	dblk.mode = psb->st_mode & 0xffff;
   1291 	dblk.mtime = psb->st_mtime;
   1292 	dblk.atime = psb->st_atime;
   1293 	dblk.frc_mode = frc_mode;
   1294 	if ((write(dirfd, name, dblk.nlen) == dblk.nlen) &&
   1295 	    (write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) {
   1296 		++dircnt;
   1297 		return;
   1298 	}
   1299 
   1300 	tty_warn(1,
   1301 	    "Unable to store mode and times for created directory: %s",name);
   1302 	return;
   1303 }
   1304 
   1305 /*
   1306  * proc_dir()
   1307  *	process all file modes and times stored for directories CREATED
   1308  *	by pax
   1309  */
   1310 
   1311 #if __STDC__
   1312 void
   1313 proc_dir(void)
   1314 #else
   1315 void
   1316 proc_dir()
   1317 #endif
   1318 {
   1319 	char name[PAXPATHLEN+1];
   1320 	DIRDATA dblk;
   1321 	u_long cnt;
   1322 
   1323 	if (dirfd < 0)
   1324 		return;
   1325 	/*
   1326 	 * read backwards through the file and process each directory
   1327 	 */
   1328 	for (cnt = 0; cnt < dircnt; ++cnt) {
   1329 		/*
   1330 		 * read the trailer, then the file name, if this fails
   1331 		 * just give up.
   1332 		 */
   1333 		if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0)
   1334 			break;
   1335 		if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk))
   1336 			break;
   1337 		if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
   1338 			break;
   1339 		if (read(dirfd, name, dblk.nlen) != dblk.nlen)
   1340 			break;
   1341 		if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
   1342 			break;
   1343 
   1344 		/*
   1345 		 * frc_mode set, make sure we set the file modes even if
   1346 		 * the user didn't ask for it (see file_subs.c for more info)
   1347 		 */
   1348 		if (pmode || dblk.frc_mode)
   1349 			set_pmode(name, dblk.mode);
   1350 		if (patime || pmtime)
   1351 			set_ftime(name, dblk.mtime, dblk.atime, 0);
   1352 	}
   1353 
   1354 	(void)close(dirfd);
   1355 	dirfd = -1;
   1356 	if (cnt != dircnt)
   1357 		tty_warn(1,
   1358 		    "Unable to set mode and times for created directories");
   1359 	return;
   1360 }
   1361 
   1362 /*
   1363  * database independent routines
   1364  */
   1365 
   1366 /*
   1367  * st_hash()
   1368  *	hashes filenames to a u_int for hashing into a table. Looks at the tail
   1369  *	end of file, as this provides far better distribution than any other
   1370  *	part of the name. For performance reasons we only care about the last
   1371  *	MAXKEYLEN chars (should be at LEAST large enough to pick off the file
   1372  *	name). Was tested on 500,000 name file tree traversal from the root
   1373  *	and gave almost a perfectly uniform distribution of keys when used with
   1374  *	prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)
   1375  *	chars at a time and pads with 0 for last addition.
   1376  * Return:
   1377  *	the hash value of the string MOD (%) the table size.
   1378  */
   1379 
   1380 #if __STDC__
   1381 u_int
   1382 st_hash(char *name, int len, int tabsz)
   1383 #else
   1384 u_int
   1385 st_hash(name, len, tabsz)
   1386 	char *name;
   1387 	int len;
   1388 	int tabsz;
   1389 #endif
   1390 {
   1391 	char *pt;
   1392 	char *dest;
   1393 	char *end;
   1394 	int i;
   1395 	u_int key = 0;
   1396 	int steps;
   1397 	int res;
   1398 	u_int val;
   1399 
   1400 	/*
   1401 	 * only look at the tail up to MAXKEYLEN, we do not need to waste
   1402 	 * time here (remember these are pathnames, the tail is what will
   1403 	 * spread out the keys)
   1404 	 */
   1405 	if (len > MAXKEYLEN) {
   1406                 pt = &(name[len - MAXKEYLEN]);
   1407 		len = MAXKEYLEN;
   1408 	} else
   1409 		pt = name;
   1410 
   1411 	/*
   1412 	 * calculate the number of u_int size steps in the string and if
   1413 	 * there is a runt to deal with
   1414 	 */
   1415 	steps = len/sizeof(u_int);
   1416 	res = len % sizeof(u_int);
   1417 
   1418 	/*
   1419 	 * add up the value of the string in unsigned integer sized pieces
   1420 	 * too bad we cannot have unsigned int aligned strings, then we
   1421 	 * could avoid the expensive copy.
   1422 	 */
   1423 	for (i = 0; i < steps; ++i) {
   1424 		end = pt + sizeof(u_int);
   1425 		dest = (char *)&val;
   1426 		while (pt < end)
   1427 			*dest++ = *pt++;
   1428 		key += val;
   1429 	}
   1430 
   1431 	/*
   1432 	 * add in the runt padded with zero to the right
   1433 	 */
   1434 	if (res) {
   1435 		val = 0;
   1436 		end = pt + res;
   1437 		dest = (char *)&val;
   1438 		while (pt < end)
   1439 			*dest++ = *pt++;
   1440 		key += val;
   1441 	}
   1442 
   1443 	/*
   1444 	 * return the result mod the table size
   1445 	 */
   1446 	return(key % tabsz);
   1447 }
   1448