1 # $NetBSD: join.awk,v 1.7 2019/10/24 16:52:11 rhialto Exp $ 2 # 3 # Copyright (c) 2002 The NetBSD Foundation, Inc. 4 # All rights reserved. 5 # 6 # This code is derived from software contributed to The NetBSD Foundation 7 # by Luke Mewburn of Wasabi Systems. 8 # 9 # Redistribution and use in source and binary forms, with or without 10 # modification, are permitted provided that the following conditions 11 # are met: 12 # 1. Redistributions of source code must retain the above copyright 13 # notice, this list of conditions and the following disclaimer. 14 # 2. Redistributions in binary form must reproduce the above copyright 15 # notice, this list of conditions and the following disclaimer in the 16 # documentation and/or other materials provided with the distribution. 17 # 18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 # POSSIBILITY OF SUCH DAMAGE. 29 # 30 # join.awk F1 F2 31 # Similar to join(1), this reads a list of words from F1 32 # and outputs lines in F2 with a first word that is in F1. 33 # For purposes of matching the first word, both instances are 34 # canonicalised via unvis(word); the version from F2 is printed. 35 # Neither file needs to be sorted. 36 37 function unvis(s) \ 38 { 39 # XXX: We don't handle the complete range of vis encodings 40 unvis_result = "" 41 while (length(s) > 0) { 42 unvis_pos = match(s, "\\\\.") 43 if (unvis_pos == 0) { 44 unvis_result = unvis_result "" s 45 s = "" 46 break 47 } 48 # copy the part before the next backslash 49 unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1) 50 s = substr(s, unvis_pos) 51 # process the backslash and next few chars 52 if (substr(s, 1, 2) == "\\\\") { 53 # double backslash -> single backslash 54 unvis_result = unvis_result "\\" 55 s = substr(s, 3) 56 } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) { 57 # \ooo with three octal digits. 58 # XXX: use strnum() is that is available 59 unvis_result = unvis_result "" sprintf("%c", \ 60 0+substr(s, 2, 1) * 64 + \ 61 0+substr(s, 3, 1) * 8 + \ 62 0+substr(s, 4, 1)) 63 s = substr(s, 5) 64 } else { 65 # unrecognised escape: keep the literal backslash 66 printf "%s: %s:%s: unrecognised escape %s\n", \ 67 ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \ 68 substr(s, 1, 2) \ 69 >"/dev/stderr" 70 unvis_result = unvis_result "" substr(s, 1, 1) 71 s = substr(s, 2) 72 } 73 } 74 return unvis_result 75 } 76 77 BEGIN \ 78 { 79 if (ARGC != 3) { 80 printf("Usage: join file1 file2\n") >"/dev/stderr" 81 exit 1 82 } 83 while ( (getline < ARGV[1]) > 0) { 84 f1 = unvis($1) 85 words[f1] = $0 86 } 87 delete ARGV[1] 88 } 89 90 { f1 = unvis($1) } 91 92 f1 in words \ 93 { 94 $1="" 95 print words[f1] $0 96 } 97