1 1.7 rhialto # $NetBSD: join.awk,v 1.7 2019/10/24 16:52:11 rhialto Exp $ 2 1.1 lukem # 3 1.1 lukem # Copyright (c) 2002 The NetBSD Foundation, Inc. 4 1.1 lukem # All rights reserved. 5 1.1 lukem # 6 1.1 lukem # This code is derived from software contributed to The NetBSD Foundation 7 1.1 lukem # by Luke Mewburn of Wasabi Systems. 8 1.1 lukem # 9 1.1 lukem # Redistribution and use in source and binary forms, with or without 10 1.1 lukem # modification, are permitted provided that the following conditions 11 1.1 lukem # are met: 12 1.1 lukem # 1. Redistributions of source code must retain the above copyright 13 1.1 lukem # notice, this list of conditions and the following disclaimer. 14 1.1 lukem # 2. Redistributions in binary form must reproduce the above copyright 15 1.1 lukem # notice, this list of conditions and the following disclaimer in the 16 1.1 lukem # documentation and/or other materials provided with the distribution. 17 1.1 lukem # 18 1.1 lukem # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 1.1 lukem # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 1.1 lukem # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 1.1 lukem # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 1.1 lukem # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 1.1 lukem # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 1.1 lukem # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 1.1 lukem # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 1.1 lukem # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 1.1 lukem # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 1.1 lukem # POSSIBILITY OF SUCH DAMAGE. 29 1.1 lukem # 30 1.1 lukem # join.awk F1 F2 31 1.1 lukem # Similar to join(1), this reads a list of words from F1 32 1.1 lukem # and outputs lines in F2 with a first word that is in F1. 33 1.7 rhialto # For purposes of matching the first word, both instances are 34 1.7 rhialto # canonicalised via unvis(word); the version from F2 is printed. 35 1.7 rhialto # Neither file needs to be sorted. 36 1.1 lukem 37 1.4 apb function unvis(s) \ 38 1.4 apb { 39 1.4 apb # XXX: We don't handle the complete range of vis encodings 40 1.4 apb unvis_result = "" 41 1.4 apb while (length(s) > 0) { 42 1.4 apb unvis_pos = match(s, "\\\\.") 43 1.4 apb if (unvis_pos == 0) { 44 1.4 apb unvis_result = unvis_result "" s 45 1.4 apb s = "" 46 1.4 apb break 47 1.4 apb } 48 1.4 apb # copy the part before the next backslash 49 1.4 apb unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1) 50 1.4 apb s = substr(s, unvis_pos) 51 1.4 apb # process the backslash and next few chars 52 1.4 apb if (substr(s, 1, 2) == "\\\\") { 53 1.4 apb # double backslash -> single backslash 54 1.4 apb unvis_result = unvis_result "\\" 55 1.4 apb s = substr(s, 3) 56 1.4 apb } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) { 57 1.4 apb # \ooo with three octal digits. 58 1.6 riz # XXX: use strnum() is that is available 59 1.4 apb unvis_result = unvis_result "" sprintf("%c", \ 60 1.4 apb 0+substr(s, 2, 1) * 64 + \ 61 1.4 apb 0+substr(s, 3, 1) * 8 + \ 62 1.4 apb 0+substr(s, 4, 1)) 63 1.4 apb s = substr(s, 5) 64 1.4 apb } else { 65 1.4 apb # unrecognised escape: keep the literal backslash 66 1.4 apb printf "%s: %s:%s: unrecognised escape %s\n", \ 67 1.4 apb ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \ 68 1.4 apb substr(s, 1, 2) \ 69 1.4 apb >"/dev/stderr" 70 1.4 apb unvis_result = unvis_result "" substr(s, 1, 1) 71 1.4 apb s = substr(s, 2) 72 1.4 apb } 73 1.4 apb } 74 1.4 apb return unvis_result 75 1.4 apb } 76 1.4 apb 77 1.1 lukem BEGIN \ 78 1.1 lukem { 79 1.1 lukem if (ARGC != 3) { 80 1.2 lukem printf("Usage: join file1 file2\n") >"/dev/stderr" 81 1.2 lukem exit 1 82 1.1 lukem } 83 1.4 apb while ( (getline < ARGV[1]) > 0) { 84 1.7 rhialto f1 = unvis($1) 85 1.7 rhialto words[f1] = $0 86 1.4 apb } 87 1.2 lukem delete ARGV[1] 88 1.1 lukem } 89 1.1 lukem 90 1.7 rhialto { f1 = unvis($1) } 91 1.4 apb 92 1.7 rhialto f1 in words \ 93 1.2 lukem { 94 1.2 lukem $1="" 95 1.2 lukem print words[f1] $0 96 1.2 lukem } 97