Home | History | Annotate | Line # | Download | only in sets
join.awk revision 1.6.16.1
      1  1.6.16.1  martin #	$NetBSD: join.awk,v 1.6.16.1 2020/04/13 07:45:31 martin Exp $
      2       1.1   lukem #
      3       1.1   lukem # Copyright (c) 2002 The NetBSD Foundation, Inc.
      4       1.1   lukem # All rights reserved.
      5       1.1   lukem #
      6       1.1   lukem # This code is derived from software contributed to The NetBSD Foundation
      7       1.1   lukem # by Luke Mewburn of Wasabi Systems.
      8       1.1   lukem #
      9       1.1   lukem # Redistribution and use in source and binary forms, with or without
     10       1.1   lukem # modification, are permitted provided that the following conditions
     11       1.1   lukem # are met:
     12       1.1   lukem # 1. Redistributions of source code must retain the above copyright
     13       1.1   lukem #    notice, this list of conditions and the following disclaimer.
     14       1.1   lukem # 2. Redistributions in binary form must reproduce the above copyright
     15       1.1   lukem #    notice, this list of conditions and the following disclaimer in the
     16       1.1   lukem #    documentation and/or other materials provided with the distribution.
     17       1.1   lukem #
     18       1.1   lukem # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19       1.1   lukem # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20       1.1   lukem # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21       1.1   lukem # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22       1.1   lukem # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23       1.1   lukem # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24       1.1   lukem # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25       1.1   lukem # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26       1.1   lukem # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27       1.1   lukem # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28       1.1   lukem # POSSIBILITY OF SUCH DAMAGE.
     29       1.1   lukem #
     30       1.1   lukem # join.awk F1 F2
     31       1.1   lukem #	Similar to join(1), this reads a list of words from F1
     32       1.1   lukem #	and outputs lines in F2 with a first word that is in F1.
     33  1.6.16.1  martin #	For purposes of matching the first word, both instances are
     34  1.6.16.1  martin #	canonicalised via unvis(word); the version from F2 is printed.
     35  1.6.16.1  martin #	Neither file needs to be sorted.
     36       1.1   lukem 
     37       1.4     apb function unvis(s) \
     38       1.4     apb {
     39       1.4     apb 	# XXX: We don't handle the complete range of vis encodings
     40       1.4     apb 	unvis_result = ""
     41       1.4     apb 	while (length(s) > 0) {
     42       1.4     apb 		unvis_pos = match(s, "\\\\.")
     43       1.4     apb 		if (unvis_pos == 0) {
     44       1.4     apb 			unvis_result = unvis_result "" s
     45       1.4     apb 			s = ""
     46       1.4     apb 			break
     47       1.4     apb 		}
     48       1.4     apb 		# copy the part before the next backslash
     49       1.4     apb 		unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
     50       1.4     apb 		s = substr(s, unvis_pos)
     51       1.4     apb 		# process the backslash and next few chars
     52       1.4     apb 		if (substr(s, 1, 2) == "\\\\") {
     53       1.4     apb 			# double backslash -> single backslash
     54       1.4     apb 			unvis_result = unvis_result "\\"
     55       1.4     apb 			s = substr(s, 3)
     56       1.4     apb 		} else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
     57       1.4     apb 			# \ooo with three octal digits.
     58       1.6     riz 			# XXX: use strnum() is that is available
     59       1.4     apb 			unvis_result = unvis_result "" sprintf("%c", \
     60       1.4     apb 				0+substr(s, 2, 1) * 64 + \
     61       1.4     apb 				0+substr(s, 3, 1) * 8 + \
     62       1.4     apb 				0+substr(s, 4, 1))
     63       1.4     apb 			s = substr(s, 5)
     64       1.4     apb 		} else {
     65       1.4     apb 			# unrecognised escape: keep the literal backslash
     66       1.4     apb 			printf "%s: %s:%s: unrecognised escape %s\n", \
     67       1.4     apb 				ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
     68       1.4     apb 				substr(s, 1, 2) \
     69       1.4     apb 				>"/dev/stderr"
     70       1.4     apb 			unvis_result = unvis_result "" substr(s, 1, 1)
     71       1.4     apb 			s = substr(s, 2)
     72       1.4     apb 		}
     73       1.4     apb 	}
     74       1.4     apb 	return unvis_result
     75       1.4     apb }
     76       1.4     apb 
     77       1.1   lukem BEGIN \
     78       1.1   lukem {
     79       1.1   lukem 	if (ARGC != 3) {
     80       1.2   lukem 		printf("Usage: join file1 file2\n") >"/dev/stderr"
     81       1.2   lukem 		exit 1
     82       1.1   lukem 	}
     83       1.4     apb 	while ( (getline < ARGV[1]) > 0) {
     84  1.6.16.1  martin 		f1 = unvis($1)
     85  1.6.16.1  martin 		words[f1] = $0
     86       1.4     apb 	}
     87       1.2   lukem 	delete ARGV[1]
     88       1.1   lukem }
     89       1.1   lukem 
     90  1.6.16.1  martin { f1 = unvis($1) }
     91       1.4     apb 
     92  1.6.16.1  martin f1 in words \
     93       1.2   lukem {
     94       1.2   lukem 	$1=""
     95       1.2   lukem 	print words[f1] $0
     96       1.2   lukem }
     97