join.awk revision 1.4 1 1.4 apb # $NetBSD: join.awk,v 1.4 2014/10/21 23:15:38 apb Exp $
2 1.1 lukem #
3 1.1 lukem # Copyright (c) 2002 The NetBSD Foundation, Inc.
4 1.1 lukem # All rights reserved.
5 1.1 lukem #
6 1.1 lukem # This code is derived from software contributed to The NetBSD Foundation
7 1.1 lukem # by Luke Mewburn of Wasabi Systems.
8 1.1 lukem #
9 1.1 lukem # Redistribution and use in source and binary forms, with or without
10 1.1 lukem # modification, are permitted provided that the following conditions
11 1.1 lukem # are met:
12 1.1 lukem # 1. Redistributions of source code must retain the above copyright
13 1.1 lukem # notice, this list of conditions and the following disclaimer.
14 1.1 lukem # 2. Redistributions in binary form must reproduce the above copyright
15 1.1 lukem # notice, this list of conditions and the following disclaimer in the
16 1.1 lukem # documentation and/or other materials provided with the distribution.
17 1.1 lukem #
18 1.1 lukem # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 1.1 lukem # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 1.1 lukem # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 1.1 lukem # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 1.1 lukem # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 1.1 lukem # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 1.1 lukem # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 1.1 lukem # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 1.1 lukem # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 1.1 lukem # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 1.1 lukem # POSSIBILITY OF SUCH DAMAGE.
29 1.1 lukem #
30 1.1 lukem # join.awk F1 F2
31 1.1 lukem # Similar to join(1), this reads a list of words from F1
32 1.1 lukem # and outputs lines in F2 with a first word that is in F1.
33 1.1 lukem # Neither file needs to be sorted
34 1.1 lukem
35 1.4 apb function unvis(s) \
36 1.4 apb {
37 1.4 apb # XXX: We don't handle the complete range of vis encodings
38 1.4 apb unvis_result = ""
39 1.4 apb while (length(s) > 0) {
40 1.4 apb unvis_pos = match(s, "\\\\.")
41 1.4 apb if (unvis_pos == 0) {
42 1.4 apb unvis_result = unvis_result "" s
43 1.4 apb s = ""
44 1.4 apb break
45 1.4 apb }
46 1.4 apb # copy the part before the next backslash
47 1.4 apb unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
48 1.4 apb s = substr(s, unvis_pos)
49 1.4 apb # process the backslash and next few chars
50 1.4 apb if (substr(s, 1, 2) == "\\\\") {
51 1.4 apb # double backslash -> single backslash
52 1.4 apb unvis_result = unvis_result "\\"
53 1.4 apb s = substr(s, 3)
54 1.4 apb } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
55 1.4 apb # \ooo with three octal digits.
56 1.4 apb # XXX: use strnum() is that is available
57 1.4 apb unvis_result = unvis_result "" sprintf("%c", \
58 1.4 apb 0+substr(s, 2, 1) * 64 + \
59 1.4 apb 0+substr(s, 3, 1) * 8 + \
60 1.4 apb 0+substr(s, 4, 1))
61 1.4 apb s = substr(s, 5)
62 1.4 apb } else {
63 1.4 apb # unrecognised escape: keep the literal backslash
64 1.4 apb printf "%s: %s:%s: unrecognised escape %s\n", \
65 1.4 apb ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
66 1.4 apb substr(s, 1, 2) \
67 1.4 apb >"/dev/stderr"
68 1.4 apb unvis_result = unvis_result "" substr(s, 1, 1)
69 1.4 apb s = substr(s, 2)
70 1.4 apb }
71 1.4 apb }
72 1.4 apb return unvis_result
73 1.4 apb }
74 1.4 apb
75 1.1 lukem BEGIN \
76 1.1 lukem {
77 1.1 lukem if (ARGC != 3) {
78 1.2 lukem printf("Usage: join file1 file2\n") >"/dev/stderr"
79 1.2 lukem exit 1
80 1.1 lukem }
81 1.4 apb while ( (getline < ARGV[1]) > 0) {
82 1.4 apb $1 = unvis($1)
83 1.2 lukem words[$1] = $0
84 1.4 apb }
85 1.2 lukem delete ARGV[1]
86 1.1 lukem }
87 1.1 lukem
88 1.4 apb // { $1 = unvis($1) }
89 1.4 apb
90 1.2 lukem $1 in words \
91 1.2 lukem {
92 1.2 lukem f1=$1
93 1.2 lukem $1=""
94 1.2 lukem print words[f1] $0
95 1.2 lukem }
96