tables.c revision 1.22 1 /* $NetBSD: tables.c,v 1.22 2003/10/13 07:41:22 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #if defined(__RCSID) && !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)tables.c 8.1 (Berkeley) 5/31/93";
40 #else
41 __RCSID("$NetBSD: tables.c,v 1.22 2003/10/13 07:41:22 agc Exp $");
42 #endif
43 #endif /* not lint */
44
45 #include <sys/types.h>
46 #include <sys/time.h>
47 #include <sys/stat.h>
48 #include <sys/param.h>
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <fcntl.h>
52 #include <paths.h>
53 #include <string.h>
54 #include <unistd.h>
55 #include <errno.h>
56 #include <stdlib.h>
57 #include "pax.h"
58 #include "tables.h"
59 #include "extern.h"
60
61 /*
62 * Routines for controlling the contents of all the different databases pax
63 * keeps. Tables are dynamically created only when they are needed. The
64 * goal was speed and the ability to work with HUGE archives. The databases
65 * were kept simple, but do have complex rules for when the contents change.
66 * As of this writing, the posix library functions were more complex than
67 * needed for this application (pax databases have very short lifetimes and
68 * do not survive after pax is finished). Pax is required to handle very
69 * large archives. These database routines carefully combine memory usage and
70 * temporary file storage in ways which will not significantly impact runtime
71 * performance while allowing the largest possible archives to be handled.
72 * Trying to force the fit to the posix database routines was not considered
73 * time well spent.
74 */
75
76 static HRDLNK **ltab = NULL; /* hard link table for detecting hard links */
77 static FTM **ftab = NULL; /* file time table for updating arch */
78 static NAMT **ntab = NULL; /* interactive rename storage table */
79 static DEVT **dtab = NULL; /* device/inode mapping tables */
80 static ATDIR **atab = NULL; /* file tree directory time reset table */
81 #ifdef DIRS_USE_FILE
82 static int dirfd = -1; /* storage for setting created dir time/mode */
83 static u_long dircnt; /* entries in dir time/mode storage */
84 #endif
85 static int ffd = -1; /* tmp file for file time table name storage */
86
87 static DEVT *chk_dev(dev_t, int);
88
89 /*
90 * hard link table routines
91 *
92 * The hard link table tries to detect hard links to files using the device and
93 * inode values. We do this when writing an archive, so we can tell the format
94 * write routine that this file is a hard link to another file. The format
95 * write routine then can store this file in whatever way it wants (as a hard
96 * link if the format supports that like tar, or ignore this info like cpio).
97 * (Actually a field in the format driver table tells us if the format wants
98 * hard link info. if not, we do not waste time looking for them). We also use
99 * the same table when reading an archive. In that situation, this table is
100 * used by the format read routine to detect hard links from stored dev and
101 * inode numbers (like cpio). This will allow pax to create a link when one
102 * can be detected by the archive format.
103 */
104
105 /*
106 * lnk_start
107 * Creates the hard link table.
108 * Return:
109 * 0 if created, -1 if failure
110 */
111
112 int
113 lnk_start(void)
114 {
115 if (ltab != NULL)
116 return(0);
117 if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) {
118 tty_warn(1, "Cannot allocate memory for hard link table");
119 return(-1);
120 }
121 return(0);
122 }
123
124 /*
125 * chk_lnk()
126 * Looks up entry in hard link hash table. If found, it copies the name
127 * of the file it is linked to (we already saw that file) into ln_name.
128 * lnkcnt is decremented and if goes to 1 the node is deleted from the
129 * database. (We have seen all the links to this file). If not found,
130 * we add the file to the database if it has the potential for having
131 * hard links to other files we may process (it has a link count > 1)
132 * Return:
133 * if found returns 1; if not found returns 0; -1 on error
134 */
135
136 int
137 chk_lnk(ARCHD *arcn)
138 {
139 HRDLNK *pt;
140 HRDLNK **ppt;
141 u_int indx;
142
143 if (ltab == NULL)
144 return(-1);
145 /*
146 * ignore those nodes that cannot have hard links
147 */
148 if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1))
149 return(0);
150
151 /*
152 * hash inode number and look for this file
153 */
154 indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
155 if ((pt = ltab[indx]) != NULL) {
156 /*
157 * it's hash chain in not empty, walk down looking for it
158 */
159 ppt = &(ltab[indx]);
160 while (pt != NULL) {
161 if ((pt->ino == arcn->sb.st_ino) &&
162 (pt->dev == arcn->sb.st_dev))
163 break;
164 ppt = &(pt->fow);
165 pt = pt->fow;
166 }
167
168 if (pt != NULL) {
169 /*
170 * found a link. set the node type and copy in the
171 * name of the file it is to link to. we need to
172 * handle hardlinks to regular files differently than
173 * other links.
174 */
175 arcn->ln_nlen = strlcpy(arcn->ln_name, pt->name,
176 sizeof(arcn->ln_name));
177 if (arcn->type == PAX_REG)
178 arcn->type = PAX_HRG;
179 else
180 arcn->type = PAX_HLK;
181
182 /*
183 * if we have found all the links to this file, remove
184 * it from the database
185 */
186 if (--pt->nlink <= 1) {
187 *ppt = pt->fow;
188 (void)free((char *)pt->name);
189 (void)free((char *)pt);
190 }
191 return(1);
192 }
193 }
194
195 /*
196 * we never saw this file before. It has links so we add it to the
197 * front of this hash chain
198 */
199 if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) {
200 if ((pt->name = strdup(arcn->name)) != NULL) {
201 pt->dev = arcn->sb.st_dev;
202 pt->ino = arcn->sb.st_ino;
203 pt->nlink = arcn->sb.st_nlink;
204 pt->fow = ltab[indx];
205 ltab[indx] = pt;
206 return(0);
207 }
208 (void)free((char *)pt);
209 }
210
211 tty_warn(1, "Hard link table out of memory");
212 return(-1);
213 }
214
215 /*
216 * purg_lnk
217 * remove reference for a file that we may have added to the data base as
218 * a potential source for hard links. We ended up not using the file, so
219 * we do not want to accidently point another file at it later on.
220 */
221
222 void
223 purg_lnk(ARCHD *arcn)
224 {
225 HRDLNK *pt;
226 HRDLNK **ppt;
227 u_int indx;
228
229 if (ltab == NULL)
230 return;
231 /*
232 * do not bother to look if it could not be in the database
233 */
234 if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) ||
235 (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG))
236 return;
237
238 /*
239 * find the hash chain for this inode value, if empty return
240 */
241 indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
242 if ((pt = ltab[indx]) == NULL)
243 return;
244
245 /*
246 * walk down the list looking for the inode/dev pair, unlink and
247 * free if found
248 */
249 ppt = &(ltab[indx]);
250 while (pt != NULL) {
251 if ((pt->ino == arcn->sb.st_ino) &&
252 (pt->dev == arcn->sb.st_dev))
253 break;
254 ppt = &(pt->fow);
255 pt = pt->fow;
256 }
257 if (pt == NULL)
258 return;
259
260 /*
261 * remove and free it
262 */
263 *ppt = pt->fow;
264 (void)free((char *)pt->name);
265 (void)free((char *)pt);
266 }
267
268 /*
269 * lnk_end()
270 * pull apart a existing link table so we can reuse it. We do this between
271 * read and write phases of append with update. (The format may have
272 * used the link table, and we need to start with a fresh table for the
273 * write phase
274 */
275
276 void
277 lnk_end(void)
278 {
279 int i;
280 HRDLNK *pt;
281 HRDLNK *ppt;
282
283 if (ltab == NULL)
284 return;
285
286 for (i = 0; i < L_TAB_SZ; ++i) {
287 if (ltab[i] == NULL)
288 continue;
289 pt = ltab[i];
290 ltab[i] = NULL;
291
292 /*
293 * free up each entry on this chain
294 */
295 while (pt != NULL) {
296 ppt = pt;
297 pt = ppt->fow;
298 (void)free((char *)ppt->name);
299 (void)free((char *)ppt);
300 }
301 }
302 return;
303 }
304
305 /*
306 * modification time table routines
307 *
308 * The modification time table keeps track of last modification times for all
309 * files stored in an archive during a write phase when -u is set. We only
310 * add a file to the archive if it is newer than a file with the same name
311 * already stored on the archive (if there is no other file with the same
312 * name on the archive it is added). This applies to writes and appends.
313 * An append with an -u must read the archive and store the modification time
314 * for every file on that archive before starting the write phase. It is clear
315 * that this is one HUGE database. To save memory space, the actual file names
316 * are stored in a scratch file and indexed by an in-memory hash table. The
317 * hash table is indexed by hashing the file path. The nodes in the table store
318 * the length of the filename and the lseek offset within the scratch file
319 * where the actual name is stored. Since there are never any deletions from this
320 * table, fragmentation of the scratch file is never a issue. Lookups seem to
321 * not exhibit any locality at all (files in the database are rarely
322 * looked up more than once...), so caching is just a waste of memory. The
323 * only limitation is the amount of scratch file space available to store the
324 * path names.
325 */
326
327 /*
328 * ftime_start()
329 * create the file time hash table and open for read/write the scratch
330 * file. (after created it is unlinked, so when we exit we leave
331 * no witnesses).
332 * Return:
333 * 0 if the table and file was created ok, -1 otherwise
334 */
335
336 int
337 ftime_start(void)
338 {
339 if (ftab != NULL)
340 return(0);
341 if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) {
342 tty_warn(1, "Cannot allocate memory for file time table");
343 return(-1);
344 }
345
346 /*
347 * get random name and create temporary scratch file, unlink name
348 * so it will get removed on exit
349 */
350 memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
351 if ((ffd = mkstemp(tempfile)) == -1) {
352 syswarn(1, errno, "Unable to create temporary file: %s",
353 tempfile);
354 return(-1);
355 }
356
357 (void)unlink(tempfile);
358 return(0);
359 }
360
361 /*
362 * chk_ftime()
363 * looks up entry in file time hash table. If not found, the file is
364 * added to the hash table and the file named stored in the scratch file.
365 * If a file with the same name is found, the file times are compared and
366 * the most recent file time is retained. If the new file was younger (or
367 * was not in the database) the new file is selected for storage.
368 * Return:
369 * 0 if file should be added to the archive, 1 if it should be skipped,
370 * -1 on error
371 */
372
373 int
374 chk_ftime(ARCHD *arcn)
375 {
376 FTM *pt;
377 int namelen;
378 u_int indx;
379 char ckname[PAXPATHLEN+1];
380
381 /*
382 * no info, go ahead and add to archive
383 */
384 if (ftab == NULL)
385 return(0);
386
387 /*
388 * hash the pathname and look up in table
389 */
390 namelen = arcn->nlen;
391 indx = st_hash(arcn->name, namelen, F_TAB_SZ);
392 if ((pt = ftab[indx]) != NULL) {
393 /*
394 * the hash chain is not empty, walk down looking for match
395 * only read up the path names if the lengths match, speeds
396 * up the search a lot
397 */
398 while (pt != NULL) {
399 if (pt->namelen == namelen) {
400 /*
401 * potential match, have to read the name
402 * from the scratch file.
403 */
404 if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) {
405 syswarn(1, errno,
406 "Failed ftime table seek");
407 return(-1);
408 }
409 if (xread(ffd, ckname, namelen) != namelen) {
410 syswarn(1, errno,
411 "Failed ftime table read");
412 return(-1);
413 }
414
415 /*
416 * if the names match, we are done
417 */
418 if (!strncmp(ckname, arcn->name, namelen))
419 break;
420 }
421
422 /*
423 * try the next entry on the chain
424 */
425 pt = pt->fow;
426 }
427
428 if (pt != NULL) {
429 /*
430 * found the file, compare the times, save the newer
431 */
432 if (arcn->sb.st_mtime > pt->mtime) {
433 /*
434 * file is newer
435 */
436 pt->mtime = arcn->sb.st_mtime;
437 return(0);
438 }
439 /*
440 * file is older
441 */
442 return(1);
443 }
444 }
445
446 /*
447 * not in table, add it
448 */
449 if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) {
450 /*
451 * add the name at the end of the scratch file, saving the
452 * offset. add the file to the head of the hash chain
453 */
454 if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) {
455 if (xwrite(ffd, arcn->name, namelen) == namelen) {
456 pt->mtime = arcn->sb.st_mtime;
457 pt->namelen = namelen;
458 pt->fow = ftab[indx];
459 ftab[indx] = pt;
460 return(0);
461 }
462 syswarn(1, errno, "Failed write to file time table");
463 } else
464 syswarn(1, errno, "Failed seek on file time table");
465 } else
466 tty_warn(1, "File time table ran out of memory");
467
468 if (pt != NULL)
469 (void)free((char *)pt);
470 return(-1);
471 }
472
473 /*
474 * Interactive rename table routines
475 *
476 * The interactive rename table keeps track of the new names that the user
477 * assigns to files from tty input. Since this map is unique for each file
478 * we must store it in case there is a reference to the file later in archive
479 * (a link). Otherwise we will be unable to find the file we know was
480 * extracted. The remapping of these files is stored in a memory based hash
481 * table (it is assumed since input must come from /dev/tty, it is unlikely to
482 * be a very large table).
483 */
484
485 /*
486 * name_start()
487 * create the interactive rename table
488 * Return:
489 * 0 if successful, -1 otherwise
490 */
491
492 int
493 name_start(void)
494 {
495 if (ntab != NULL)
496 return(0);
497 if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) {
498 tty_warn(1,
499 "Cannot allocate memory for interactive rename table");
500 return(-1);
501 }
502 return(0);
503 }
504
505 /*
506 * add_name()
507 * add the new name to old name mapping just created by the user.
508 * If an old name mapping is found (there may be duplicate names on an
509 * archive) only the most recent is kept.
510 * Return:
511 * 0 if added, -1 otherwise
512 */
513
514 int
515 add_name(char *oname, int onamelen, char *nname)
516 {
517 NAMT *pt;
518 u_int indx;
519
520 if (ntab == NULL) {
521 /*
522 * should never happen
523 */
524 tty_warn(0, "No interactive rename table, links may fail\n");
525 return(0);
526 }
527
528 /*
529 * look to see if we have already mapped this file, if so we
530 * will update it
531 */
532 indx = st_hash(oname, onamelen, N_TAB_SZ);
533 if ((pt = ntab[indx]) != NULL) {
534 /*
535 * look down the has chain for the file
536 */
537 while ((pt != NULL) && (strcmp(oname, pt->oname) != 0))
538 pt = pt->fow;
539
540 if (pt != NULL) {
541 /*
542 * found an old mapping, replace it with the new one
543 * the user just input (if it is different)
544 */
545 if (strcmp(nname, pt->nname) == 0)
546 return(0);
547
548 (void)free((char *)pt->nname);
549 if ((pt->nname = strdup(nname)) == NULL) {
550 tty_warn(1, "Cannot update rename table");
551 return(-1);
552 }
553 return(0);
554 }
555 }
556
557 /*
558 * this is a new mapping, add it to the table
559 */
560 if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) {
561 if ((pt->oname = strdup(oname)) != NULL) {
562 if ((pt->nname = strdup(nname)) != NULL) {
563 pt->fow = ntab[indx];
564 ntab[indx] = pt;
565 return(0);
566 }
567 (void)free((char *)pt->oname);
568 }
569 (void)free((char *)pt);
570 }
571 tty_warn(1, "Interactive rename table out of memory");
572 return(-1);
573 }
574
575 /*
576 * sub_name()
577 * look up a link name to see if it points at a file that has been
578 * remapped by the user. If found, the link is adjusted to contain the
579 * new name (oname is the link to name)
580 */
581
582 void
583 sub_name(char *oname, int *onamelen, size_t onamesize)
584 {
585 NAMT *pt;
586 u_int indx;
587
588 if (ntab == NULL)
589 return;
590 /*
591 * look the name up in the hash table
592 */
593 indx = st_hash(oname, *onamelen, N_TAB_SZ);
594 if ((pt = ntab[indx]) == NULL)
595 return;
596
597 while (pt != NULL) {
598 /*
599 * walk down the hash chain looking for a match
600 */
601 if (strcmp(oname, pt->oname) == 0) {
602 /*
603 * found it, replace it with the new name
604 * and return (we know that oname has enough space)
605 */
606 *onamelen = strlcpy(oname, pt->nname, onamesize);
607 return;
608 }
609 pt = pt->fow;
610 }
611
612 /*
613 * no match, just return
614 */
615 return;
616 }
617
618 /*
619 * device/inode mapping table routines
620 * (used with formats that store device and inodes fields)
621 *
622 * device/inode mapping tables remap the device field in a archive header. The
623 * device/inode fields are used to determine when files are hard links to each
624 * other. However these values have very little meaning outside of that. This
625 * database is used to solve one of two different problems.
626 *
627 * 1) when files are appended to an archive, while the new files may have hard
628 * links to each other, you cannot determine if they have hard links to any
629 * file already stored on the archive from a prior run of pax. We must assume
630 * that these inode/device pairs are unique only within a SINGLE run of pax
631 * (which adds a set of files to an archive). So we have to make sure the
632 * inode/dev pairs we add each time are always unique. We do this by observing
633 * while the inode field is very dense, the use of the dev field is fairly
634 * sparse. Within each run of pax, we remap any device number of a new archive
635 * member that has a device number used in a prior run and already stored in a
636 * file on the archive. During the read phase of the append, we store the
637 * device numbers used and mark them to not be used by any file during the
638 * write phase. If during write we go to use one of those old device numbers,
639 * we remap it to a new value.
640 *
641 * 2) Often the fields in the archive header used to store these values are
642 * too small to store the entire value. The result is an inode or device value
643 * which can be truncated. This really can foul up an archive. With truncation
644 * we end up creating links between files that are really not links (after
645 * truncation the inodes are the same value). We address that by detecting
646 * truncation and forcing a remap of the device field to split truncated
647 * inodes away from each other. Each truncation creates a pattern of bits that
648 * are removed. We use this pattern of truncated bits to partition the inodes
649 * on a single device to many different devices (each one represented by the
650 * truncated bit pattern). All inodes on the same device that have the same
651 * truncation pattern are mapped to the same new device. Two inodes that
652 * truncate to the same value clearly will always have different truncation
653 * bit patterns, so they will be split from away each other. When we spot
654 * device truncation we remap the device number to a non truncated value.
655 * (for more info see table.h for the data structures involved).
656 */
657
658 /*
659 * dev_start()
660 * create the device mapping table
661 * Return:
662 * 0 if successful, -1 otherwise
663 */
664
665 int
666 dev_start(void)
667 {
668 if (dtab != NULL)
669 return(0);
670 if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) {
671 tty_warn(1, "Cannot allocate memory for device mapping table");
672 return(-1);
673 }
674 return(0);
675 }
676
677 /*
678 * add_dev()
679 * add a device number to the table. this will force the device to be
680 * remapped to a new value if it be used during a write phase. This
681 * function is called during the read phase of an append to prohibit the
682 * use of any device number already in the archive.
683 * Return:
684 * 0 if added ok, -1 otherwise
685 */
686
687 int
688 add_dev(ARCHD *arcn)
689 {
690 if (chk_dev(arcn->sb.st_dev, 1) == NULL)
691 return(-1);
692 return(0);
693 }
694
695 /*
696 * chk_dev()
697 * check for a device value in the device table. If not found and the add
698 * flag is set, it is added. This does NOT assign any mapping values, just
699 * adds the device number as one that need to be remapped. If this device
700 * is already mapped, just return with a pointer to that entry.
701 * Return:
702 * pointer to the entry for this device in the device map table. Null
703 * if the add flag is not set and the device is not in the table (it is
704 * not been seen yet). If add is set and the device cannot be added, null
705 * is returned (indicates an error).
706 */
707
708 static DEVT *
709 chk_dev(dev_t dev, int add)
710 {
711 DEVT *pt;
712 u_int indx;
713
714 if (dtab == NULL)
715 return(NULL);
716 /*
717 * look to see if this device is already in the table
718 */
719 indx = ((unsigned)dev) % D_TAB_SZ;
720 if ((pt = dtab[indx]) != NULL) {
721 while ((pt != NULL) && (pt->dev != dev))
722 pt = pt->fow;
723
724 /*
725 * found it, return a pointer to it
726 */
727 if (pt != NULL)
728 return(pt);
729 }
730
731 /*
732 * not in table, we add it only if told to as this may just be a check
733 * to see if a device number is being used.
734 */
735 if (add == 0)
736 return(NULL);
737
738 /*
739 * allocate a node for this device and add it to the front of the hash
740 * chain. Note we do not assign remaps values here, so the pt->list
741 * list must be NULL.
742 */
743 if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) {
744 tty_warn(1, "Device map table out of memory");
745 return(NULL);
746 }
747 pt->dev = dev;
748 pt->list = NULL;
749 pt->fow = dtab[indx];
750 dtab[indx] = pt;
751 return(pt);
752 }
753 /*
754 * map_dev()
755 * given an inode and device storage mask (the mask has a 1 for each bit
756 * the archive format is able to store in a header), we check for inode
757 * and device truncation and remap the device as required. Device mapping
758 * can also occur when during the read phase of append a device number was
759 * seen (and was marked as do not use during the write phase). WE ASSUME
760 * that unsigned longs are the same size or bigger than the fields used
761 * for ino_t and dev_t. If not the types will have to be changed.
762 * Return:
763 * 0 if all ok, -1 otherwise.
764 */
765
766 int
767 map_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)
768 {
769 DEVT *pt;
770 DLIST *dpt;
771 static dev_t lastdev = 0; /* next device number to try */
772 int trc_ino = 0;
773 int trc_dev = 0;
774 ino_t trunc_bits = 0;
775 ino_t nino;
776
777 if (dtab == NULL)
778 return(0);
779 /*
780 * check for device and inode truncation, and extract the truncated
781 * bit pattern.
782 */
783 if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)
784 ++trc_dev;
785 if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {
786 ++trc_ino;
787 trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);
788 }
789
790 /*
791 * see if this device is already being mapped, look up the device
792 * then find the truncation bit pattern which applies
793 */
794 if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) {
795 /*
796 * this device is already marked to be remapped
797 */
798 for (dpt = pt->list; dpt != NULL; dpt = dpt->fow)
799 if (dpt->trunc_bits == trunc_bits)
800 break;
801
802 if (dpt != NULL) {
803 /*
804 * we are being remapped for this device and pattern
805 * change the device number to be stored and return
806 */
807 arcn->sb.st_dev = dpt->dev;
808 arcn->sb.st_ino = nino;
809 return(0);
810 }
811 } else {
812 /*
813 * this device is not being remapped YET. if we do not have any
814 * form of truncation, we do not need a remap
815 */
816 if (!trc_ino && !trc_dev)
817 return(0);
818
819 /*
820 * we have truncation, have to add this as a device to remap
821 */
822 if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL)
823 goto bad;
824
825 /*
826 * if we just have a truncated inode, we have to make sure that
827 * all future inodes that do not truncate (they have the
828 * truncation pattern of all 0's) continue to map to the same
829 * device number. We probably have already written inodes with
830 * this device number to the archive with the truncation
831 * pattern of all 0's. So we add the mapping for all 0's to the
832 * same device number.
833 */
834 if (!trc_dev && (trunc_bits != 0)) {
835 if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)
836 goto bad;
837 dpt->trunc_bits = 0;
838 dpt->dev = arcn->sb.st_dev;
839 dpt->fow = pt->list;
840 pt->list = dpt;
841 }
842 }
843
844 /*
845 * look for a device number not being used. We must watch for wrap
846 * around on lastdev (so we do not get stuck looking forever!)
847 */
848 while (++lastdev > 0) {
849 if (chk_dev(lastdev, 0) != NULL)
850 continue;
851 /*
852 * found an unused value. If we have reached truncation point
853 * for this format we are hosed, so we give up. Otherwise we
854 * mark it as being used.
855 */
856 if (((lastdev & ((dev_t)dev_mask)) != lastdev) ||
857 (chk_dev(lastdev, 1) == NULL))
858 goto bad;
859 break;
860 }
861
862 if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL))
863 goto bad;
864
865 /*
866 * got a new device number, store it under this truncation pattern.
867 * change the device number this file is being stored with.
868 */
869 dpt->trunc_bits = trunc_bits;
870 dpt->dev = lastdev;
871 dpt->fow = pt->list;
872 pt->list = dpt;
873 arcn->sb.st_dev = lastdev;
874 arcn->sb.st_ino = nino;
875 return(0);
876
877 bad:
878 tty_warn(1,
879 "Unable to fix truncated inode/device field when storing %s",
880 arcn->name);
881 tty_warn(0, "Archive may create improper hard links when extracted");
882 return(0);
883 }
884
885 /*
886 * directory access/mod time reset table routines (for directories READ by pax)
887 *
888 * The pax -t flag requires that access times of archive files to be the same
889 * as before being read by pax. For regular files, access time is restored after
890 * the file has been copied. This database provides the same functionality for
891 * directories read during file tree traversal. Restoring directory access time
892 * is more complex than files since directories may be read several times until
893 * all the descendants in their subtree are visited by fts. Directory access
894 * and modification times are stored during the fts pre-order visit (done
895 * before any descendants in the subtree is visited) and restored after the
896 * fts post-order visit (after all the descendants have been visited). In the
897 * case of premature exit from a subtree (like from the effects of -n), any
898 * directory entries left in this database are reset during final cleanup
899 * operations of pax. Entries are hashed by inode number for fast lookup.
900 */
901
902 /*
903 * atdir_start()
904 * create the directory access time database for directories READ by pax.
905 * Return:
906 * 0 is created ok, -1 otherwise.
907 */
908
909 int
910 atdir_start(void)
911 {
912 if (atab != NULL)
913 return(0);
914 if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) {
915 tty_warn(1,
916 "Cannot allocate space for directory access time table");
917 return(-1);
918 }
919 return(0);
920 }
921
922
923 /*
924 * atdir_end()
925 * walk through the directory access time table and reset the access time
926 * of any directory who still has an entry left in the database. These
927 * entries are for directories READ by pax
928 */
929
930 void
931 atdir_end(void)
932 {
933 ATDIR *pt;
934 int i;
935
936 if (atab == NULL)
937 return;
938 /*
939 * for each non-empty hash table entry reset all the directories
940 * chained there.
941 */
942 for (i = 0; i < A_TAB_SZ; ++i) {
943 if ((pt = atab[i]) == NULL)
944 continue;
945 /*
946 * remember to force the times, set_ftime() looks at pmtime
947 * and patime, which only applies to things CREATED by pax,
948 * not read by pax. Read time reset is controlled by -t.
949 */
950 for (; pt != NULL; pt = pt->fow)
951 set_ftime(pt->name, pt->mtime, pt->atime, 1);
952 }
953 }
954
955 /*
956 * add_atdir()
957 * add a directory to the directory access time table. Table is hashed
958 * and chained by inode number. This is for directories READ by pax
959 */
960
961 void
962 add_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime)
963 {
964 ATDIR *pt;
965 u_int indx;
966
967 if (atab == NULL)
968 return;
969
970 /*
971 * make sure this directory is not already in the table, if so just
972 * return (the older entry always has the correct time). The only
973 * way this will happen is when the same subtree can be traversed by
974 * different args to pax and the -n option is aborting fts out of a
975 * subtree before all the post-order visits have been made.
976 */
977 indx = ((unsigned)ino) % A_TAB_SZ;
978 if ((pt = atab[indx]) != NULL) {
979 while (pt != NULL) {
980 if ((pt->ino == ino) && (pt->dev == dev))
981 break;
982 pt = pt->fow;
983 }
984
985 /*
986 * oops, already there. Leave it alone.
987 */
988 if (pt != NULL)
989 return;
990 }
991
992 /*
993 * add it to the front of the hash chain
994 */
995 if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) {
996 if ((pt->name = strdup(fname)) != NULL) {
997 pt->dev = dev;
998 pt->ino = ino;
999 pt->mtime = mtime;
1000 pt->atime = atime;
1001 pt->fow = atab[indx];
1002 atab[indx] = pt;
1003 return;
1004 }
1005 (void)free((char *)pt);
1006 }
1007
1008 tty_warn(1, "Directory access time reset table ran out of memory");
1009 return;
1010 }
1011
1012 /*
1013 * get_atdir()
1014 * look up a directory by inode and device number to obtain the access
1015 * and modification time you want to set to. If found, the modification
1016 * and access time parameters are set and the entry is removed from the
1017 * table (as it is no longer needed). These are for directories READ by
1018 * pax
1019 * Return:
1020 * 0 if found, -1 if not found.
1021 */
1022
1023 int
1024 get_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime)
1025 {
1026 ATDIR *pt;
1027 ATDIR **ppt;
1028 u_int indx;
1029
1030 if (atab == NULL)
1031 return(-1);
1032 /*
1033 * hash by inode and search the chain for an inode and device match
1034 */
1035 indx = ((unsigned)ino) % A_TAB_SZ;
1036 if ((pt = atab[indx]) == NULL)
1037 return(-1);
1038
1039 ppt = &(atab[indx]);
1040 while (pt != NULL) {
1041 if ((pt->ino == ino) && (pt->dev == dev))
1042 break;
1043 /*
1044 * no match, go to next one
1045 */
1046 ppt = &(pt->fow);
1047 pt = pt->fow;
1048 }
1049
1050 /*
1051 * return if we did not find it.
1052 */
1053 if (pt == NULL)
1054 return(-1);
1055
1056 /*
1057 * found it. return the times and remove the entry from the table.
1058 */
1059 *ppt = pt->fow;
1060 *mtime = pt->mtime;
1061 *atime = pt->atime;
1062 (void)free((char *)pt->name);
1063 (void)free((char *)pt);
1064 return(0);
1065 }
1066
1067 /*
1068 * directory access mode and time storage routines (for directories CREATED
1069 * by pax).
1070 *
1071 * Pax requires that extracted directories, by default, have their access/mod
1072 * times and permissions set to the values specified in the archive. During the
1073 * actions of extracting (and creating the destination subtree during -rw copy)
1074 * directories extracted may be modified after being created. Even worse is
1075 * that these directories may have been created with file permissions which
1076 * prohibits any descendants of these directories from being extracted. When
1077 * directories are created by pax, access rights may be added to permit the
1078 * creation of files in their subtree. Every time pax creates a directory, the
1079 * times and file permissions specified by the archive are stored. After all
1080 * files have been extracted (or copied), these directories have their times
1081 * and file modes reset to the stored values. The directory info is restored in
1082 * reverse order as entries were added to the data file from root to leaf. To
1083 * restore atime properly, we must go backwards. The data file consists of
1084 * records with two parts, the file name followed by a DIRDATA trailer. The
1085 * fixed sized trailer contains the size of the name plus the off_t location in
1086 * the file. To restore we work backwards through the file reading the trailer
1087 * then the file name.
1088 */
1089
1090 #ifndef DIRS_USE_FILE
1091 static DIRDATA *dirdata_head;
1092 #endif
1093
1094 /*
1095 * dir_start()
1096 * set up the directory time and file mode storage for directories CREATED
1097 * by pax.
1098 * Return:
1099 * 0 if ok, -1 otherwise
1100 */
1101
1102 int
1103 dir_start(void)
1104 {
1105 #ifdef DIRS_USE_FILE
1106 if (dirfd != -1)
1107 return(0);
1108
1109 /*
1110 * unlink the file so it goes away at termination by itself
1111 */
1112 memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
1113 if ((dirfd = mkstemp(tempfile)) >= 0) {
1114 (void)unlink(tempfile);
1115 return(0);
1116 }
1117 tty_warn(1, "Unable to create temporary file for directory times: %s",
1118 tempfile);
1119 return(-1);
1120 #else
1121 return (0);
1122 #endif /* DIRS_USE_FILE */
1123 }
1124
1125 /*
1126 * add_dir()
1127 * add the mode and times for a newly CREATED directory
1128 * name is name of the directory, psb the stat buffer with the data in it,
1129 * frc_mode is a flag that says whether to force the setting of the mode
1130 * (ignoring the user set values for preserving file mode). Frc_mode is
1131 * for the case where we created a file and found that the resulting
1132 * directory was not writable and the user asked for file modes to NOT
1133 * be preserved. (we have to preserve what was created by default, so we
1134 * have to force the setting at the end. this is stated explicitly in the
1135 * pax spec)
1136 */
1137
1138 void
1139 add_dir(char *name, int nlen, struct stat *psb, int frc_mode)
1140 {
1141 #ifdef DIRS_USE_FILE
1142 DIRDATA dblk;
1143
1144 if (dirfd < 0)
1145 return;
1146
1147 /*
1148 * get current position (where file name will start) so we can store it
1149 * in the trailer
1150 */
1151 if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) {
1152 tty_warn(1,
1153 "Unable to store mode and times for directory: %s",name);
1154 return;
1155 }
1156
1157 /*
1158 * write the file name followed by the trailer
1159 */
1160 dblk.nlen = nlen + 1;
1161 dblk.mode = psb->st_mode & 0xffff;
1162 dblk.mtime = psb->st_mtime;
1163 dblk.atime = psb->st_atime;
1164 #if HAVE_STRUCT_STAT_ST_FLAGS
1165 dblk.fflags = psb->st_flags;
1166 #else
1167 dblk.fflags = 0;
1168 #endif
1169 dblk.frc_mode = frc_mode;
1170 if ((xwrite(dirfd, name, dblk.nlen) == dblk.nlen) &&
1171 (xwrite(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) {
1172 ++dircnt;
1173 return;
1174 }
1175
1176 tty_warn(1,
1177 "Unable to store mode and times for created directory: %s",name);
1178 return;
1179 #else
1180 DIRDATA *dblk;
1181
1182 if ((dblk = malloc(sizeof(*dblk))) == NULL ||
1183 (dblk->name = strdup(name)) == NULL) {
1184 tty_warn(1,
1185 "Unable to store mode and times for directory: %s",name);
1186 if (dblk != NULL)
1187 free(dblk);
1188 return;
1189 }
1190
1191 dblk->mode = psb->st_mode & 0xffff;
1192 dblk->mtime = psb->st_mtime;
1193 dblk->atime = psb->st_atime;
1194 #if HAVE_STRUCT_STAT_ST_FLAGS
1195 dblk->fflags = psb->st_flags;
1196 #else
1197 dblk->fflags = 0;
1198 #endif
1199 dblk->frc_mode = frc_mode;
1200
1201 dblk->next = dirdata_head;
1202 dirdata_head = dblk;
1203 return;
1204 #endif /* DIRS_USE_FILE */
1205 }
1206
1207 /*
1208 * proc_dir()
1209 * process all file modes and times stored for directories CREATED
1210 * by pax
1211 */
1212
1213 void
1214 proc_dir(void)
1215 {
1216 #ifdef DIRS_USE_FILE
1217 char name[PAXPATHLEN+1];
1218 DIRDATA dblk;
1219 u_long cnt;
1220
1221 if (dirfd < 0)
1222 return;
1223 /*
1224 * read backwards through the file and process each directory
1225 */
1226 for (cnt = 0; cnt < dircnt; ++cnt) {
1227 /*
1228 * read the trailer, then the file name, if this fails
1229 * just give up.
1230 */
1231 if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0)
1232 break;
1233 if (xread(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk))
1234 break;
1235 if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
1236 break;
1237 if (xread(dirfd, name, dblk.nlen) != dblk.nlen)
1238 break;
1239 if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
1240 break;
1241
1242 /*
1243 * frc_mode set, make sure we set the file modes even if
1244 * the user didn't ask for it (see file_subs.c for more info)
1245 */
1246 if (pmode || dblk.frc_mode)
1247 set_pmode(name, dblk.mode);
1248 if (patime || pmtime)
1249 set_ftime(name, dblk.mtime, dblk.atime, 0);
1250 if (pfflags)
1251 set_chflags(name, dblk.fflags);
1252 }
1253
1254 (void)close(dirfd);
1255 dirfd = -1;
1256 if (cnt != dircnt)
1257 tty_warn(1,
1258 "Unable to set mode and times for created directories");
1259 return;
1260 #else
1261 DIRDATA *dblk;
1262
1263 for (dblk = dirdata_head; dblk != NULL; dblk = dirdata_head) {
1264 dirdata_head = dblk->next;
1265
1266 /*
1267 * frc_mode set, make sure we set the file modes even if
1268 * the user didn't ask for it (see file_subs.c for more info)
1269 */
1270 if (pmode || dblk->frc_mode)
1271 set_pmode(dblk->name, dblk->mode);
1272 if (patime || pmtime)
1273 set_ftime(dblk->name, dblk->mtime, dblk->atime, 0);
1274 if (pfflags)
1275 set_chflags(dblk->name, dblk->fflags);
1276
1277 free(dblk->name);
1278 free(dblk);
1279 }
1280 #endif /* DIRS_USE_FILE */
1281 }
1282
1283 /*
1284 * database independent routines
1285 */
1286
1287 /*
1288 * st_hash()
1289 * hashes filenames to a u_int for hashing into a table. Looks at the tail
1290 * end of file, as this provides far better distribution than any other
1291 * part of the name. For performance reasons we only care about the last
1292 * MAXKEYLEN chars (should be at LEAST large enough to pick off the file
1293 * name). Was tested on 500,000 name file tree traversal from the root
1294 * and gave almost a perfectly uniform distribution of keys when used with
1295 * prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)
1296 * chars at a time and pads with 0 for last addition.
1297 * Return:
1298 * the hash value of the string MOD (%) the table size.
1299 */
1300
1301 u_int
1302 st_hash(char *name, int len, int tabsz)
1303 {
1304 char *pt;
1305 char *dest;
1306 char *end;
1307 int i;
1308 u_int key = 0;
1309 int steps;
1310 int res;
1311 u_int val;
1312
1313 /*
1314 * only look at the tail up to MAXKEYLEN, we do not need to waste
1315 * time here (remember these are pathnames, the tail is what will
1316 * spread out the keys)
1317 */
1318 if (len > MAXKEYLEN) {
1319 pt = &(name[len - MAXKEYLEN]);
1320 len = MAXKEYLEN;
1321 } else
1322 pt = name;
1323
1324 /*
1325 * calculate the number of u_int size steps in the string and if
1326 * there is a runt to deal with
1327 */
1328 steps = len/sizeof(u_int);
1329 res = len % sizeof(u_int);
1330
1331 /*
1332 * add up the value of the string in unsigned integer sized pieces
1333 * too bad we cannot have unsigned int aligned strings, then we
1334 * could avoid the expensive copy.
1335 */
1336 for (i = 0; i < steps; ++i) {
1337 end = pt + sizeof(u_int);
1338 dest = (char *)&val;
1339 while (pt < end)
1340 *dest++ = *pt++;
1341 key += val;
1342 }
1343
1344 /*
1345 * add in the runt padded with zero to the right
1346 */
1347 if (res) {
1348 val = 0;
1349 end = pt + res;
1350 dest = (char *)&val;
1351 while (pt < end)
1352 *dest++ = *pt++;
1353 key += val;
1354 }
1355
1356 /*
1357 * return the result mod the table size
1358 */
1359 return(key % tabsz);
1360 }
1361