join.awk revision 1.6.16.1 1 # $NetBSD: join.awk,v 1.6.16.1 2020/04/13 07:45:31 martin Exp $
2 #
3 # Copyright (c) 2002 The NetBSD Foundation, Inc.
4 # All rights reserved.
5 #
6 # This code is derived from software contributed to The NetBSD Foundation
7 # by Luke Mewburn of Wasabi Systems.
8 #
9 # Redistribution and use in source and binary forms, with or without
10 # modification, are permitted provided that the following conditions
11 # are met:
12 # 1. Redistributions of source code must retain the above copyright
13 # notice, this list of conditions and the following disclaimer.
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 # POSSIBILITY OF SUCH DAMAGE.
29 #
30 # join.awk F1 F2
31 # Similar to join(1), this reads a list of words from F1
32 # and outputs lines in F2 with a first word that is in F1.
33 # For purposes of matching the first word, both instances are
34 # canonicalised via unvis(word); the version from F2 is printed.
35 # Neither file needs to be sorted.
36
37 function unvis(s) \
38 {
39 # XXX: We don't handle the complete range of vis encodings
40 unvis_result = ""
41 while (length(s) > 0) {
42 unvis_pos = match(s, "\\\\.")
43 if (unvis_pos == 0) {
44 unvis_result = unvis_result "" s
45 s = ""
46 break
47 }
48 # copy the part before the next backslash
49 unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
50 s = substr(s, unvis_pos)
51 # process the backslash and next few chars
52 if (substr(s, 1, 2) == "\\\\") {
53 # double backslash -> single backslash
54 unvis_result = unvis_result "\\"
55 s = substr(s, 3)
56 } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
57 # \ooo with three octal digits.
58 # XXX: use strnum() is that is available
59 unvis_result = unvis_result "" sprintf("%c", \
60 0+substr(s, 2, 1) * 64 + \
61 0+substr(s, 3, 1) * 8 + \
62 0+substr(s, 4, 1))
63 s = substr(s, 5)
64 } else {
65 # unrecognised escape: keep the literal backslash
66 printf "%s: %s:%s: unrecognised escape %s\n", \
67 ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
68 substr(s, 1, 2) \
69 >"/dev/stderr"
70 unvis_result = unvis_result "" substr(s, 1, 1)
71 s = substr(s, 2)
72 }
73 }
74 return unvis_result
75 }
76
77 BEGIN \
78 {
79 if (ARGC != 3) {
80 printf("Usage: join file1 file2\n") >"/dev/stderr"
81 exit 1
82 }
83 while ( (getline < ARGV[1]) > 0) {
84 f1 = unvis($1)
85 words[f1] = $0
86 }
87 delete ARGV[1]
88 }
89
90 { f1 = unvis($1) }
91
92 f1 in words \
93 {
94 $1=""
95 print words[f1] $0
96 }
97