bdftruncate.c revision 89d09728
189d09728Smrg/*-
289d09728Smrg * Copyright (c) 2006 Martin Husemann.
389d09728Smrg * Copyright (c) 2007 Joerg Sonnenberger.
489d09728Smrg * All rights reserved.
589d09728Smrg *
689d09728Smrg * Redistribution and use in source and binary forms, with or without
789d09728Smrg * modification, are permitted provided that the following conditions
889d09728Smrg * are met:
989d09728Smrg * 1. Redistributions of source code must retain the above copyright
1089d09728Smrg *    notice, this list of conditions and the following disclaimer.
1189d09728Smrg * 2. The name of the author may not be used to endorse or promote
1289d09728Smrg *    products derived from this software without specific prior
1389d09728Smrg *    written permission.
1489d09728Smrg *
1589d09728Smrg * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1689d09728Smrg * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1789d09728Smrg * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1889d09728Smrg * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
1989d09728Smrg * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2089d09728Smrg * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2189d09728Smrg * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2289d09728Smrg * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2389d09728Smrg * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2489d09728Smrg * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2589d09728Smrg * POSSIBILITY OF SUCH DAMAGE.
2689d09728Smrg */
2789d09728Smrg
2889d09728Smrg/*
2989d09728Smrg * This program is derived (in a straight forward way) from
3089d09728Smrg * bdftruncate.pl -- Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/>
3189d09728Smrg *
3289d09728Smrg * This utility allows you to generate from an ISO10646-1 encoded
3389d09728Smrg * BDF font other ISO10646-1 BDF fonts in which all characters above
3489d09728Smrg * a threshold code value are stored unencoded.
3589d09728Smrg */
3689d09728Smrg
3789d09728Smrg#include <ctype.h>
3889d09728Smrg#include <errno.h>
3989d09728Smrg#include <limits.h>
4089d09728Smrg#include <stdio.h>
4189d09728Smrg#include <stdlib.h>
4289d09728Smrg#include <string.h>
4389d09728Smrg
4489d09728Smrgstatic int iswide(unsigned int);
4589d09728Smrgstatic void usage(void);
4689d09728Smrg
4789d09728Smrgstatic int opt_minus_w;
4889d09728Smrgstatic int opt_plus_w;
4989d09728Smrgstatic int removewide;
5089d09728Smrgstatic unsigned long threshold;
5189d09728Smrg
5289d09728Smrgstatic int
5389d09728Smrgparse_threshold(const char *str)
5489d09728Smrg{
5589d09728Smrg	int base;
5689d09728Smrg	char *end_ptr;
5789d09728Smrg
5889d09728Smrg	if (!isdigit((unsigned char)*str))
5989d09728Smrg		return 1;
6089d09728Smrg	if (str[0] == 0 && (str[1] == 'x' || str[1] == 'X'))
6189d09728Smrg		base = 16;
6289d09728Smrg	else
6389d09728Smrg		base = 10;
6489d09728Smrg
6589d09728Smrg	errno = 0;
6689d09728Smrg	threshold = strtoul(str, &end_ptr, base);
6789d09728Smrg	if (errno != 0 || threshold == 0)
6889d09728Smrg		return 1;
6989d09728Smrg	return 0;
7089d09728Smrg}
7189d09728Smrg
7289d09728Smrgstatic void
7389d09728Smrgprocess_line(const char *line)
7489d09728Smrg{
7589d09728Smrg	if (strncmp(line, "ENCODING", 8) == 0) {
7689d09728Smrg		unsigned long enc;
7789d09728Smrg		const char *v;
7889d09728Smrg
7989d09728Smrg		v = line + 9;
8089d09728Smrg
8189d09728Smrg		while (*v && isspace((unsigned char)(*v)))
8289d09728Smrg			++v;
8389d09728Smrg		enc = strtoul(v, NULL, 10);
8489d09728Smrg		/* XXX Check for line-ending? */
8589d09728Smrg		if (enc >= threshold || (removewide && iswide(enc))) {
8689d09728Smrg			printf("ENCODING -1\n");
8789d09728Smrg		} else {
8889d09728Smrg			fputs(line, stdout);
8989d09728Smrg		}
9089d09728Smrg		return;
9189d09728Smrg	}
9289d09728Smrg	if (strncmp(line, "STARTFONT", 9) == 0) {
9389d09728Smrg		fputs(line, stdout);
9489d09728Smrg		printf("COMMENT AUTOMATICALLY GENERATED FILE. DO NOT EDIT!\n"
9589d09728Smrg		    "COMMENT In this version of the font file, "
9689d09728Smrg		    "all characters >= U+%04lx are\n"
9789d09728Smrg		    "COMMENT not encoded to keep XFontStruct small.\n",
9889d09728Smrg		    threshold);
9989d09728Smrg		return;
10089d09728Smrg	}
10189d09728Smrg	if (strncmp(line, "COMMENT", 7) == 0) {
10289d09728Smrg		const char *v = line + 8;
10389d09728Smrg
10489d09728Smrg		while (*v && isspace((unsigned char)(*v)))
10589d09728Smrg			v++;
10689d09728Smrg		if (strncmp(v, "$id: ", 5) == 0 ||
10789d09728Smrg		    strncmp(v, "$Id: ", 5) == 0) {
10889d09728Smrg		    	const char *id = strchr(v+1, '$');
10989d09728Smrg		    	if (id) {
11089d09728Smrg		    		printf("COMMENT Derived from %.*s",
11189d09728Smrg				     (int)(id - v - 4), v + 5);
11289d09728Smrg				return;
11389d09728Smrg		    	}
11489d09728Smrg		}
11589d09728Smrg	}
11689d09728Smrg	fputs(line, stdout);
11789d09728Smrg}
11889d09728Smrg
11989d09728Smrgint
12089d09728Smrgmain(int argc, char **argv)
12189d09728Smrg{
12289d09728Smrg	int removewide;
12389d09728Smrg	char *line, *input_ptr;
12489d09728Smrg	size_t line_len, rest_len;
12589d09728Smrg
12689d09728Smrg	--argc;
12789d09728Smrg	++argv;
12889d09728Smrg	if (argc == 0)
12989d09728Smrg		usage();
13089d09728Smrg
13189d09728Smrg	if (strcmp(*argv, "-w") == 0 || strcmp(*argv, "+w") == 0) {
13289d09728Smrg		if (**argv == '-')
13389d09728Smrg			opt_minus_w = 1;
13489d09728Smrg		else
13589d09728Smrg			opt_plus_w = 1;
13689d09728Smrg		--argc;
13789d09728Smrg		++argv;
13889d09728Smrg	}
13989d09728Smrg
14089d09728Smrg	if (argc != 1 || (opt_plus_w && opt_minus_w))
14189d09728Smrg		usage();
14289d09728Smrg	if (parse_threshold(*argv)) {
14389d09728Smrg		fprintf(stderr, "Illegal threshold %s", *argv);
14489d09728Smrg		usage();
14589d09728Smrg	}
14689d09728Smrg
14789d09728Smrg	if (opt_minus_w)
14889d09728Smrg		removewide = 1;
14989d09728Smrg	else if (opt_plus_w)
15089d09728Smrg		removewide = 0;
15189d09728Smrg	else
15289d09728Smrg		removewide = (threshold <= 0x3200);
15389d09728Smrg
15489d09728Smrg	line_len = 1024;
15589d09728Smrg	if ((line = malloc(line_len)) == NULL) {
15689d09728Smrg		fprintf(stderr, "malloc failed");
15789d09728Smrg		exit(EXIT_FAILURE);
15889d09728Smrg	}
15989d09728Smrg
16089d09728Smrg	for (;;) {
16189d09728Smrg		if (fgets(line, line_len, stdin) == NULL)
16289d09728Smrg		     break;
16389d09728Smrg		while (strlen(line) == line_len - 1 && !feof(stdin)) {
16489d09728Smrg			if (line_len > SSIZE_MAX) {
16589d09728Smrg				fprintf(stderr, "input line too large");
16689d09728Smrg				exit(EXIT_FAILURE);
16789d09728Smrg			}
16889d09728Smrg			line = realloc(line, line_len * 2);
16989d09728Smrg			if (line == NULL) {
17089d09728Smrg				fprintf(stderr, "realloc failed");
17189d09728Smrg				exit(EXIT_FAILURE);
17289d09728Smrg			}
17389d09728Smrg			input_ptr = line + line_len - 1;
17489d09728Smrg			rest_len = line_len + 1;
17589d09728Smrg			line_len *= 2;
17689d09728Smrg			if (fgets(input_ptr, rest_len, stdin) == NULL) {
17789d09728Smrg				/* Should not happen, but handle as EOF */
17889d09728Smrg				break;
17989d09728Smrg			}
18089d09728Smrg		}
18189d09728Smrg		process_line(line);
18289d09728Smrg	}
18389d09728Smrg
18489d09728Smrg	return EXIT_SUCCESS;
18589d09728Smrg}
18689d09728Smrg
18789d09728Smrg/*
18889d09728Smrg * Subroutine to identify whether the ISO 10646/Unicode character code
18989d09728Smrg * ucs belongs into the East Asian Wide (W) or East Asian FullWidth
19089d09728Smrg * (F) category as defined in Unicode Technical Report #11.
19189d09728Smrg */
19289d09728Smrgstatic int
19389d09728Smrgiswide(unsigned int ucs)
19489d09728Smrg{
19589d09728Smrg    return (ucs >= 0x1100 &&
19689d09728Smrg            (ucs <= 0x115f ||                   /* Hangul Jamo */
19789d09728Smrg             (ucs >= 0x2e80 && ucs <= 0xa4cf &&
19889d09728Smrg              (ucs & ~0x0011) != 0x300a && ucs != 0x303f) || /* CJK .. Yi */
19989d09728Smrg             (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
20089d09728Smrg             (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Comp. Ideographs */
20189d09728Smrg             (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Comp. Forms */
20289d09728Smrg             (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
20389d09728Smrg             (ucs >= 0xffe0 && ucs <= 0xffe6) ||
20489d09728Smrg             (ucs >= 0x20000 && ucs <= 0x2ffff)));
20589d09728Smrg}
20689d09728Smrg
20789d09728Smrgstatic void
20889d09728Smrgusage(void)
20989d09728Smrg{
21089d09728Smrg	fprintf(stderr,
21189d09728Smrg	    "Usage: bdftruncate [+w|-w] threshold <source.bdf >destination.bdf\n"
21289d09728Smrg	    "\n"
21389d09728Smrg	    "Example:\n"
21489d09728Smrg	    "\n"
21589d09728Smrg	    "  bdftruncate 0x3200 <6x13.bdf >6x13t.bdf\n"
21689d09728Smrg	    "\n"
21789d09728Smrg	    "will generate the file 6x13t.bdf in which all glyphs with codes\n"
21889d09728Smrg	    ">= 0x3200 will only be stored unencoded (i.e., ENCODING -1).\n"
21989d09728Smrg	    "Option -w removes East Asian Wide and East Asian FullWidth characters\n"
22089d09728Smrg	    "(default if threshold <= 0x3200), and option +w keeps them.\n");
22189d09728Smrg	exit(EXIT_FAILURE);
22289d09728Smrg}
223