dist/testdir/T.utf

#!/bin/sh

echo T.utf: tests of utf functions

awk=${awk-../a.out}

$awk '
BEGIN {
	FS = "\t"
	awk = "../a.out"
}
NF == 0 || $1 ~ /^#/ {
	next
}
$1 ~ /try/ {	# new test
	nt++
	sub(/try [a-zA-Z_0-9]+ /, "")
	prog = $0
	printf("try %3d %s\n", nt, prog)
	prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog)
	# print "prog is", prog
	nt2 = 0
	while (getline > 0) {
		if (NF == 0)	# blank line terminates a sequence
			break
		input = $1
		for (i = 2; i < NF; i++)	# input data
			input = input "\t" $i
		test = sprintf("./echo '"'"'%s'"'"' | %s >foo1; ",
			input, prog)
		if ($NF == "\"\"")
			output = ">foo2;"
		else
			output = sprintf("./echo '"'"'%s'"'"' >foo2; ", $NF)
		gsub(/\\t/, "\t", output)
		gsub(/\\n/, "\n", output)
		run = sprintf("diff foo1 foo2 || echo test %d.%d failed",
			nt, ++nt2)
		# print  "input is", input
		# print  "test is", test
		# print  "output is", output
		# print  "run is", run
		system(test output run)
	}
	tt += nt2
}
END { print tt, "tests" }
' <<\!!!!
# General format:
# try program as rest of line
# $1	$2	$3	output1  (\t for tab, \n for newline,
# $1	$2	$3	output2  ("" for null)
# ... terminated by blank line

# try another program...

try length { print length($1) }
	0
a	1
	7
	6
	6
	6
	6
	1
 finger	8
	4
	3
	2
	5
	3
	4
	7
	3
	2
	3
	6
	3
	6

try index { print index($1, $2) }
abc	a	1
abc	b	2
abc	x	0
	""	0
		6
		2
	x	0
x	x	2
 finger	f	3
 finger	r	8

try substr { print substr($0, 2, 3) }
abcdef	bcd


xy	y

try rematch { print $1 ~ $2 }
abc	a	1
abc	x	0
		1
	.*	1
	.*	0
	+	1

# replace first occurrence of $2 by $3 in $1
try sub { n = sub($2, $3, $1); print n, $1 }
abcdef	bc	XYZ	1 aXYZdef
abcdef	xy	XYZ	0 abcdef
		NO	1 NO
 finger	.*g	FING	1 FINGer
	.	x	1 x

# replace all occurrences of $2 by $3 in $1
try gsub { n = gsub($2, $3, $1); print n, $1 }
abcdef	bc	XYZ	1 aXYZdef
abcdef	xy	XYZ	0 abcdef
		NO	2 NONO
 finger	.*g	FING	1 FINGer
	.	x	6 xxxxxx

try match { print match($1, $2), RSTART, RLENGTH }
abc	[^a]	2 2 1
abc	[^ab]	3 3 1
	[^]	2 2 1
	[^-]	5 5 1
abc	a	1 1 1
abc	x	0 0 -1
		4 4 1
	.*	4 4 4
	.*	0 0 -1
	+	2 2 1
	x+	0 0 -1
	.	2 2 2
	[]	4 4 1
	[-]	0 0 -1
	[^-]	1 1 1
  	[-]	2 2 1
  	[-]+	2 2 3
xxx  	[-]	4 4 1
    	.*	3 3 15
   	[^]	2 2 1

# FS="" should split into unicode chars
try emptyFS BEGIN {FS=""} {print NF}
	10
	7
	6
	6
	6
	6
	1
 finger	8

# printf(%N.Ns) for utf8 strings
try printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1}
abcd	[   ab][ab   ]
abc	[   ][   ]
abc	[   ][   ]
abc	[   a][a   ]
	[   ][   ]
xyz	[   ][   ]
	[    ][    ]

# printf(%N.Ns) for utf8 strings
try printfs2 {printf("[%5s][%-5s]\n"), $1, $1}
abcd	[ abcd][abcd ]
ab	[ ab][ab ]
aab	[aab][aab]
aabc	[aabc][aabc]
ab	[ ab][ab ]
abc	[ abc][abc ]
	[][]
	[    ][    ]

# printf(%N.Ns) for utf8 strings
try printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1}
abcd	[ab][ab]
abc	[][]
abc	[][]
	[][]
	[][]

# printf(%c) for utf
try printfc {printf("%c %c\n", $1, substr($1,2,1))}


!!!!