1 1.25 christos /* $NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $ */ 2 1.3 jtc 3 1.1 cgd /*- 4 1.3 jtc * Copyright (c) 1989, 1993 5 1.3 jtc * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * Redistribution and use in source and binary forms, with or without 8 1.1 cgd * modification, are permitted provided that the following conditions 9 1.1 cgd * are met: 10 1.1 cgd * 1. Redistributions of source code must retain the above copyright 11 1.1 cgd * notice, this list of conditions and the following disclaimer. 12 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 cgd * notice, this list of conditions and the following disclaimer in the 14 1.1 cgd * documentation and/or other materials provided with the distribution. 15 1.8 agc * 3. Neither the name of the University nor the names of its contributors 16 1.1 cgd * may be used to endorse or promote products derived from this software 17 1.1 cgd * without specific prior written permission. 18 1.1 cgd * 19 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 1.1 cgd * SUCH DAMAGE. 30 1.1 cgd */ 31 1.1 cgd 32 1.5 lukem #include <sys/cdefs.h> 33 1.1 cgd #ifndef lint 34 1.12 lukem __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 35 1.12 lukem The Regents of the University of California. All rights reserved."); 36 1.1 cgd #endif /* not lint */ 37 1.1 cgd 38 1.1 cgd #ifndef lint 39 1.3 jtc #if 0 40 1.3 jtc static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93"; 41 1.3 jtc #endif 42 1.25 christos __RCSID("$NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $"); 43 1.1 cgd #endif /* not lint */ 44 1.1 cgd 45 1.1 cgd #include <stdio.h> 46 1.4 jtc #include <string.h> 47 1.4 jtc #include <stdlib.h> 48 1.21 christos #include <string.h> 49 1.17 christos #include <errno.h> 50 1.16 christos #include <wchar.h> 51 1.18 christos #include <limits.h> 52 1.4 jtc #include <unistd.h> 53 1.4 jtc #include <err.h> 54 1.1 cgd #include <vis.h> 55 1.1 cgd 56 1.13 christos #include "extern.h" 57 1.1 cgd 58 1.13 christos static int eflags, fold, foldwidth = 80, none, markeol; 59 1.13 christos #ifdef DEBUG 60 1.13 christos int debug; 61 1.13 christos #endif 62 1.19 christos static const char *extra = ""; 63 1.13 christos 64 1.13 christos static void process(FILE *); 65 1.4 jtc 66 1.4 jtc int 67 1.13 christos main(int argc, char *argv[]) 68 1.1 cgd { 69 1.1 cgd FILE *fp; 70 1.1 cgd int ch; 71 1.6 itohy int rval; 72 1.1 cgd 73 1.25 christos while ((ch = getopt(argc, argv, "bcde:F:fhlMmNnoSstw")) != -1) 74 1.1 cgd switch((char)ch) { 75 1.13 christos case 'b': 76 1.13 christos eflags |= VIS_NOSLASH; 77 1.1 cgd break; 78 1.1 cgd case 'c': 79 1.1 cgd eflags |= VIS_CSTYLE; 80 1.1 cgd break; 81 1.13 christos #ifdef DEBUG 82 1.13 christos case 'd': 83 1.13 christos debug++; 84 1.1 cgd break; 85 1.13 christos #endif 86 1.7 lukem case 'e': 87 1.7 lukem extra = optarg; 88 1.7 lukem break; 89 1.1 cgd case 'F': 90 1.13 christos if ((foldwidth = atoi(optarg)) < 5) { 91 1.4 jtc errx(1, "can't fold lines to less than 5 cols"); 92 1.4 jtc /* NOTREACHED */ 93 1.1 cgd } 94 1.13 christos markeol++; 95 1.13 christos break; 96 1.1 cgd case 'f': 97 1.1 cgd fold++; /* fold output lines to 80 cols */ 98 1.1 cgd break; /* using hidden newline */ 99 1.13 christos case 'h': 100 1.13 christos eflags |= VIS_HTTPSTYLE; 101 1.13 christos break; 102 1.1 cgd case 'l': 103 1.1 cgd markeol++; /* mark end of line with \$ */ 104 1.1 cgd break; 105 1.25 christos case 'M': 106 1.25 christos eflags |= VIS_META; 107 1.25 christos break; 108 1.13 christos case 'm': 109 1.13 christos eflags |= VIS_MIMESTYLE; 110 1.13 christos if (foldwidth == 80) 111 1.13 christos foldwidth = 76; 112 1.13 christos break; 113 1.25 christos case 'N': 114 1.25 christos eflags |= VIS_NOLOCALE; 115 1.23 christos break; 116 1.13 christos case 'n': 117 1.13 christos none++; 118 1.13 christos break; 119 1.13 christos case 'o': 120 1.13 christos eflags |= VIS_OCTAL; 121 1.13 christos break; 122 1.25 christos case 'S': 123 1.25 christos eflags |= VIS_SHELL; 124 1.25 christos break; 125 1.13 christos case 's': 126 1.13 christos eflags |= VIS_SAFE; 127 1.13 christos break; 128 1.13 christos case 't': 129 1.13 christos eflags |= VIS_TAB; 130 1.13 christos break; 131 1.13 christos case 'w': 132 1.13 christos eflags |= VIS_WHITE; 133 1.1 cgd break; 134 1.1 cgd case '?': 135 1.1 cgd default: 136 1.13 christos (void)fprintf(stderr, 137 1.25 christos "Usage: %s [-bcfhlMmNnoSstw] [-e extra]" 138 1.13 christos " [-F foldwidth] [file ...]\n", getprogname()); 139 1.13 christos return 1; 140 1.1 cgd } 141 1.13 christos 142 1.14 christos if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) == 143 1.13 christos (VIS_HTTPSTYLE|VIS_MIMESTYLE)) 144 1.13 christos errx(1, "Can't specify -m and -h at the same time"); 145 1.13 christos 146 1.1 cgd argc -= optind; 147 1.1 cgd argv += optind; 148 1.1 cgd 149 1.6 itohy rval = 0; 150 1.6 itohy 151 1.1 cgd if (*argv) 152 1.1 cgd while (*argv) { 153 1.13 christos if ((fp = fopen(*argv, "r")) != NULL) { 154 1.13 christos process(fp); 155 1.13 christos (void)fclose(fp); 156 1.6 itohy } else { 157 1.4 jtc warn("%s", *argv); 158 1.6 itohy rval = 1; 159 1.6 itohy } 160 1.1 cgd argv++; 161 1.1 cgd } 162 1.1 cgd else 163 1.14 christos process(stdin); 164 1.13 christos return rval; 165 1.1 cgd } 166 1.1 cgd 167 1.13 christos static void 168 1.13 christos process(FILE *fp) 169 1.1 cgd { 170 1.1 cgd static int col = 0; 171 1.13 christos static char nul[] = "\0"; 172 1.13 christos char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */ 173 1.21 christos wint_t c, c1, rachar; 174 1.22 christos char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */ 175 1.22 christos char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */ 176 1.20 christos int mbilen, cerr = 0, raerr = 0; 177 1.1 cgd 178 1.21 christos /* 179 1.21 christos * The input stream is considered to be multibyte characters. 180 1.21 christos * The input loop will read this data inputing one character, 181 1.21 christos * possibly multiple bytes, at a time and converting each to 182 1.21 christos * a wide character wchar_t. 183 1.21 christos * 184 1.21 christos * The vis(3) functions, however, require single either bytes 185 1.21 christos * or a multibyte string as their arguments. So we convert 186 1.21 christos * our input wchar_t and the following look-ahead wchar_t to 187 1.21 christos * a multibyte string for processing by vis(3). 188 1.21 christos */ 189 1.21 christos 190 1.21 christos /* Read one multibyte character, store as wchar_t */ 191 1.16 christos c = getwc(fp); 192 1.20 christos if (c == WEOF && errno == EILSEQ) { 193 1.21 christos /* Error in multibyte data. Read one byte. */ 194 1.17 christos c = (wint_t)getc(fp); 195 1.20 christos cerr = 1; 196 1.20 christos } 197 1.17 christos while (c != WEOF) { 198 1.21 christos /* Clear multibyte input buffer. */ 199 1.21 christos memset(mbibuff, 0, sizeof(mbibuff)); 200 1.21 christos /* Read-ahead next multibyte character. */ 201 1.22 christos if (!cerr) 202 1.22 christos rachar = getwc(fp); 203 1.22 christos if (cerr || (rachar == WEOF && errno == EILSEQ)) { 204 1.21 christos /* Error in multibyte data. Read one byte. */ 205 1.17 christos rachar = (wint_t)getc(fp); 206 1.20 christos raerr = 1; 207 1.20 christos } 208 1.1 cgd if (none) { 209 1.21 christos /* Handle -n flag. */ 210 1.1 cgd cp = buff; 211 1.1 cgd *cp++ = c; 212 1.1 cgd if (c == '\\') 213 1.1 cgd *cp++ = '\\'; 214 1.1 cgd *cp = '\0'; 215 1.1 cgd } else if (markeol && c == '\n') { 216 1.21 christos /* Handle -l flag. */ 217 1.1 cgd cp = buff; 218 1.1 cgd if ((eflags & VIS_NOSLASH) == 0) 219 1.1 cgd *cp++ = '\\'; 220 1.1 cgd *cp++ = '$'; 221 1.1 cgd *cp++ = '\n'; 222 1.1 cgd *cp = '\0'; 223 1.18 christos } else { 224 1.21 christos /* 225 1.21 christos * Convert character using vis(3) library. 226 1.21 christos * At this point we will process one character. 227 1.21 christos * But we must pass the vis(3) library this 228 1.21 christos * character plus the next one because the next 229 1.21 christos * one is used as a look-ahead to decide how to 230 1.21 christos * encode this one under certain circumstances. 231 1.21 christos * 232 1.21 christos * Since our characters may be multibyte, e.g., 233 1.21 christos * in the UTF-8 locale, we cannot use vis() and 234 1.21 christos * svis() which require byte input, so we must 235 1.21 christos * create a multibyte string and use strvisx(). 236 1.21 christos */ 237 1.21 christos /* Treat EOF as a NUL char. */ 238 1.18 christos c1 = rachar; 239 1.18 christos if (c1 == WEOF) 240 1.18 christos c1 = L'\0'; 241 1.21 christos /* 242 1.21 christos * If we hit a multibyte conversion error above, 243 1.21 christos * insert byte directly into string buff because 244 1.21 christos * wctomb() will fail. Else convert wchar_t to 245 1.21 christos * multibyte using wctomb(). 246 1.21 christos */ 247 1.20 christos if (cerr) { 248 1.21 christos *mbibuff = (char)c; 249 1.20 christos mbilen = 1; 250 1.20 christos } else 251 1.20 christos mbilen = wctomb(mbibuff, c); 252 1.21 christos /* Same for look-ahead character. */ 253 1.20 christos if (raerr) 254 1.21 christos mbibuff[mbilen] = (char)c1; 255 1.20 christos else 256 1.20 christos wctomb(mbibuff + mbilen, c1); 257 1.21 christos /* Perform encoding on just first character. */ 258 1.22 christos (void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff, 259 1.22 christos 1, eflags, extra, &cerr); 260 1.18 christos } 261 1.1 cgd 262 1.1 cgd cp = buff; 263 1.1 cgd if (fold) { 264 1.1 cgd #ifdef DEBUG 265 1.1 cgd if (debug) 266 1.13 christos (void)printf("<%02d,", col); 267 1.1 cgd #endif 268 1.13 christos col = foldit(cp, col, foldwidth, eflags); 269 1.1 cgd #ifdef DEBUG 270 1.1 cgd if (debug) 271 1.13 christos (void)printf("%02d>", col); 272 1.1 cgd #endif 273 1.1 cgd } 274 1.1 cgd do { 275 1.13 christos (void)putchar(*cp); 276 1.1 cgd } while (*++cp); 277 1.1 cgd c = rachar; 278 1.20 christos cerr = raerr; 279 1.1 cgd } 280 1.1 cgd /* 281 1.1 cgd * terminate partial line with a hidden newline 282 1.1 cgd */ 283 1.13 christos if (fold && *(cp - 1) != '\n') 284 1.13 christos (void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n"); 285 1.1 cgd } 286