T.utf revision 1.1 1 #!/bin/sh
2
3 echo T.utf: tests of utf functions
4
5 awk=${awk-../a.out}
6
7 $awk '
8 BEGIN {
9 FS = "\t"
10 awk = "../a.out"
11 }
12 NF == 0 || $1 ~ /^#/ {
13 next
14 }
15 $1 ~ /try/ { # new test
16 nt++
17 sub(/try [a-zA-Z_0-9]+ /, "")
18 prog = $0
19 printf("try %3d %s\n", nt, prog)
20 prog = sprintf("%s -F\"\\t\" '"'"'%s'"'"'", awk, prog)
21 # print "prog is", prog
22 nt2 = 0
23 while (getline > 0) {
24 if (NF == 0) # blank line terminates a sequence
25 break
26 input = $1
27 for (i = 2; i < NF; i++) # input data
28 input = input "\t" $i
29 test = sprintf("./echo '"'"'%s'"'"' | %s >foo1; ",
30 input, prog)
31 if ($NF == "\"\"")
32 output = ">foo2;"
33 else
34 output = sprintf("./echo '"'"'%s'"'"' >foo2; ", $NF)
35 gsub(/\\t/, "\t", output)
36 gsub(/\\n/, "\n", output)
37 run = sprintf("diff foo1 foo2 || echo test %d.%d failed",
38 nt, ++nt2)
39 # print "input is", input
40 # print "test is", test
41 # print "output is", output
42 # print "run is", run
43 system(test output run)
44 }
45 tt += nt2
46 }
47 END { print tt, "tests" }
48 ' <<\!!!!
49 # General format:
50 # try program as rest of line
51 # $1 $2 $3 output1 (\t for tab, \n for newline,
52 # $1 $2 $3 output2 ("" for null)
53 # ... terminated by blank line
54
55 # try another program...
56
57 try length { print length($1) }
58 0
59 a 1
60 7
61 6
62 6
63 6
64 6
65 1
66 finger 8
67 4
68 3
69 2
70 5
71 3
72 4
73 7
74 3
75 2
76 3
77 6
78 3
79 6
80
81 try index { print index($1, $2) }
82 abc a 1
83 abc b 2
84 abc x 0
85 "" 0
86 6
87 2
88 x 0
89 x x 2
90 finger f 3
91 finger r 8
92
93 try substr { print substr($0, 2, 3) }
94 abcdef bcd
95
96
97
98 xy y
99
100 try rematch { print $1 ~ $2 }
101 abc a 1
102 abc x 0
103 1
104 .* 1
105 .* 0
106 + 1
107
108 # replace first occurrence of $2 by $3 in $1
109 try sub { n = sub($2, $3, $1); print n, $1 }
110 abcdef bc XYZ 1 aXYZdef
111 abcdef xy XYZ 0 abcdef
112 NO 1 NO
113 finger .*g FING 1 FINGer
114 . x 1 x
115
116 # replace all occurrences of $2 by $3 in $1
117 try gsub { n = gsub($2, $3, $1); print n, $1 }
118 abcdef bc XYZ 1 aXYZdef
119 abcdef xy XYZ 0 abcdef
120 NO 2 NONO
121 finger .*g FING 1 FINGer
122 . x 6 xxxxxx
123
124 try match { print match($1, $2), RSTART, RLENGTH }
125 abc [^a] 2 2 1
126 abc [^ab] 3 3 1
127 [^] 2 2 1
128 [^-] 5 5 1
129 abc a 1 1 1
130 abc x 0 0 -1
131 4 4 1
132 .* 4 4 4
133 .* 0 0 -1
134 + 2 2 1
135 x+ 0 0 -1
136 . 2 2 2
137 [] 4 4 1
138 [-] 0 0 -1
139 [^-] 1 1 1
140 [-] 2 2 1
141 [-]+ 2 2 3
142 xxx [-] 4 4 1
143 .* 3 3 15
144 [^] 2 2 1
145
146 # FS="" should split into unicode chars
147 try emptyFS BEGIN {FS=""} {print NF}
148 10
149 7
150 6
151 6
152 6
153 6
154 1
155 finger 8
156
157 # printf(%N.Ns) for utf8 strings
158 try printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1}
159 abcd [ ab][ab ]
160 abc [ ][ ]
161 abc [ ][ ]
162 abc [ a][a ]
163 [ ][ ]
164 xyz [ ][ ]
165 [ ][ ]
166
167 # printf(%N.Ns) for utf8 strings
168 try printfs2 {printf("[%5s][%-5s]\n"), $1, $1}
169 abcd [ abcd][abcd ]
170 ab [ ab][ab ]
171 aab [aab][aab]
172 aabc [aabc][aabc]
173 ab [ ab][ab ]
174 abc [ abc][abc ]
175 [][]
176 [ ][ ]
177
178 # printf(%N.Ns) for utf8 strings
179 try printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1}
180 abcd [ab][ab]
181 abc [][]
182 abc [][]
183 [][]
184 [][]
185
186 # printf(%c) for utf
187 try printfc {printf("%c %c\n", $1, substr($1,2,1))}
188
189
190
191
192
193
194 !!!!
195