Home | History | Annotate | Line # | Download | only in sets
      1 #	$NetBSD: join.awk,v 1.7 2019/10/24 16:52:11 rhialto Exp $
      2 #
      3 # Copyright (c) 2002 The NetBSD Foundation, Inc.
      4 # All rights reserved.
      5 #
      6 # This code is derived from software contributed to The NetBSD Foundation
      7 # by Luke Mewburn of Wasabi Systems.
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28 # POSSIBILITY OF SUCH DAMAGE.
     29 #
     30 # join.awk F1 F2
     31 #	Similar to join(1), this reads a list of words from F1
     32 #	and outputs lines in F2 with a first word that is in F1.
     33 #	For purposes of matching the first word, both instances are
     34 #	canonicalised via unvis(word); the version from F2 is printed.
     35 #	Neither file needs to be sorted.
     36 
     37 function unvis(s) \
     38 {
     39 	# XXX: We don't handle the complete range of vis encodings
     40 	unvis_result = ""
     41 	while (length(s) > 0) {
     42 		unvis_pos = match(s, "\\\\.")
     43 		if (unvis_pos == 0) {
     44 			unvis_result = unvis_result "" s
     45 			s = ""
     46 			break
     47 		}
     48 		# copy the part before the next backslash
     49 		unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
     50 		s = substr(s, unvis_pos)
     51 		# process the backslash and next few chars
     52 		if (substr(s, 1, 2) == "\\\\") {
     53 			# double backslash -> single backslash
     54 			unvis_result = unvis_result "\\"
     55 			s = substr(s, 3)
     56 		} else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
     57 			# \ooo with three octal digits.
     58 			# XXX: use strnum() is that is available
     59 			unvis_result = unvis_result "" sprintf("%c", \
     60 				0+substr(s, 2, 1) * 64 + \
     61 				0+substr(s, 3, 1) * 8 + \
     62 				0+substr(s, 4, 1))
     63 			s = substr(s, 5)
     64 		} else {
     65 			# unrecognised escape: keep the literal backslash
     66 			printf "%s: %s:%s: unrecognised escape %s\n", \
     67 				ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
     68 				substr(s, 1, 2) \
     69 				>"/dev/stderr"
     70 			unvis_result = unvis_result "" substr(s, 1, 1)
     71 			s = substr(s, 2)
     72 		}
     73 	}
     74 	return unvis_result
     75 }
     76 
     77 BEGIN \
     78 {
     79 	if (ARGC != 3) {
     80 		printf("Usage: join file1 file2\n") >"/dev/stderr"
     81 		exit 1
     82 	}
     83 	while ( (getline < ARGV[1]) > 0) {
     84 		f1 = unvis($1)
     85 		words[f1] = $0
     86 	}
     87 	delete ARGV[1]
     88 }
     89 
     90 { f1 = unvis($1) }
     91 
     92 f1 in words \
     93 {
     94 	$1=""
     95 	print words[f1] $0
     96 }
     97