1#! /usr/bin/perl
2#
3# Copyright (c) 2009, 2010, Oracle and/or its affiliates.
4#
5# Permission is hereby granted, free of charge, to any person obtaining a
6# copy of this software and associated documentation files (the "Software"),
7# to deal in the Software without restriction, including without limitation
8# the rights to use, copy, modify, merge, publish, distribute, sublicense,
9# and/or sell copies of the Software, and to permit persons to whom the
10# Software is furnished to do so, subject to the following conditions:
11#
12# The above copyright notice and this permission notice (including the next
13# paragraph) shall be included in all copies or substantial portions of the
14# Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22# DEALINGS IN THE SOFTWARE.
23#
24
25#
26# Make a DocBook chart showing compose combinations for a locale
27#
28# See perldoc at end (or run with --help or --man options) for details
29# of command-line options.
30#
31
32# Compose file grammar is defined in modules/im/ximcp/imLcPrs.c
33
34use strict;
35use warnings;
36use Getopt::Long;
37use Pod::Usage;
38
39my $error_count = 0;
40
41my $charset;
42my $locale_name;
43my $output_filename = '-';
44my $man = 0;
45my $help = 0;
46my $make_index = 0;
47
48GetOptions ('charset:s' => \$charset,
49	    'locale=s' => \$locale_name,
50	    'output=s' => \$output_filename,
51	    'index' => \$make_index,
52	    'help|?' => \$help,
53	    'man' => \$man)
54    or pod2usage(2);
55pod2usage(1) if $help;
56pod2usage(-exitstatus => 0, -verbose => 2) if $man;
57
58if (!defined($charset) || ($charset eq "")) {
59  if (defined($locale_name)) {
60    my $guessed_charset = $locale_name;
61    $guessed_charset =~ s{^.*\.}{};
62    if ($guessed_charset =~ m{^(utf-8|gbk|gb18030)$}i) {
63      $charset = $1;
64    } elsif ($guessed_charset =~ m{iso8859-(\d+)}i) {
65      $charset = "iso-8859-$1";
66    } elsif ($guessed_charset =~ m{^microsoft-cp(125\d)$}) {
67      $charset = "windows-$1";
68    }
69  }
70  if (!defined($charset) || ($charset eq "")) {
71    $charset = "utf-8";
72  }
73}
74
75if ($make_index) {
76  # Print Docbook output
77  open my $OUTPUT, '>', $output_filename
78      or die "Could not create $output_filename: $!";
79
80  print $OUTPUT
81      join ("\n",
82	    qq(<?xml version="1.0" encoding="$charset" ?>),
83	    q(<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"),
84	    q( "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd">),
85	    q(<article id="libX11-keys">),
86	    q(  <articleinfo>),
87	    q(    <title>Xlib Compose Key Charts</title>),
88	    q(  </articleinfo>),
89	    ( map { qq(  <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"  href="$_.xml">\
90    <xi:fallback><section><title>$_</title><para></para></section></xi:fallback>\
91  </xi:include>) }
92	      @ARGV ),
93	    q(</article>),
94	    "\n"
95      );
96
97  close $OUTPUT or die "Couldn't write $output_filename: $!";
98
99  exit(0);
100}
101
102foreach my $a (@ARGV) {
103  $error_count += make_compose_chart($a);
104}
105
106exit($error_count);
107
108sub make_compose_chart {
109  my ($filename) = @_;
110  my $errors = 0;
111
112  my @compose_table = ();
113  my @included_files = ();
114
115  my $line = 0;
116  my $pre_file = ($filename =~ m{\.pre$}) ? 1 : 0;
117  my $in_c_comment = 0;
118  my $in_comment = 0;
119  my $keyseq_count = 0;
120
121  open my $COMPOSE, '<', $filename or die "Could not open $filename: $!";
122
123 COMPOSE_LINE:
124  while (my $cl = <$COMPOSE>) {
125    $line++;
126    chomp($cl);
127    my $original_line = $cl;
128
129    # Special handling for changes cpp makes to .pre files
130    if ($pre_file == 1) {
131      if ($in_c_comment) {		# Look for end of multi-line C comment
132	if ($cl =~ m{\*/(.*)$}) {
133	  $cl = $1;
134	  $in_c_comment = 0;
135	} else {
136	  next;
137	}
138      }
139      $cl =~ s{/\*.\**/}{};		# Remove single line C comments
140      if ($cl =~ m{^(.*)/\*}) {		# Start of a multi-line C comment
141	$cl = $1;
142	$in_c_comment = 1;
143      }
144      $cl =~ s{^\s*XCOMM}{#};		# Translate pre-processing comments
145    }
146
147    chomp($cl);
148
149    if ($cl =~ m{^\s*#\s*(.*)$}) {	# Comment only lines
150      # Combine comment blocks
151      my $comment = $1;
152
153      if ($in_comment) {
154	my $prev_comment = pop @compose_table;
155	$comment = join(' ', $prev_comment->{-comment}, $comment);
156      } else {
157	$in_comment = 1;
158      }
159
160      push @compose_table, { -type => 'comment', -comment => $comment };
161      next COMPOSE_LINE;
162    }
163
164    $in_comment = 0;
165
166    if ($cl =~ m{^\s*$}) {		# Skip blank lines
167      next COMPOSE_LINE;
168    }
169    elsif ($cl =~ m{^(STATE\s+|END_STATE)}) {
170      # Sun extension to compose file syntax
171      next COMPOSE_LINE;
172    }
173    elsif ($cl =~ m{^([^:]+)\s*:\s*(.+)$}) {
174      my ($seq, $action) = ($1, $2);
175      $seq =~ s{\s+$}{};
176
177      my @keys = grep { $_ !~ m/^\s*$/ } split /[\s\<\>]+/, $seq;
178
179      push @compose_table, {
180	-type => 'keyseq',
181	-keys => [ @keys ],
182	-action => $action
183      };
184      $keyseq_count++;
185      next COMPOSE_LINE;
186    } elsif ($cl =~ m{^(STATE_TYPE:|\@StartDeadKeyMap|\@EndDeadKeyMap)}) {
187      # ignore
188      next COMPOSE_LINE;
189    } elsif ($cl =~ m{^include "(.*)"}) {
190      my $incpath = $1;
191      $incpath =~ s{^X11_LOCALEDATADIR/(.*)/Compose}{the $1 compose table};
192
193      push @included_files, $incpath;
194      next COMPOSE_LINE;
195    } else {
196      print STDERR ('Unrecognized pattern in ', $filename,
197		    ' on line #', $line, ":\n  ", $cl, "\n");
198    }
199  }
200  close $COMPOSE;
201
202  if ($errors > 0) {
203    return $errors;
204  }
205
206  # Print Docbook output
207  open my $OUTPUT, '>', $output_filename
208      or die "Could not create $output_filename: $!";
209
210  print $OUTPUT
211      join ("\n",
212	    qq(<?xml version="1.0" encoding="$charset" ?>),
213	    q(<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"),
214	    q( "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd">),
215	    qq(<section id="$locale_name">),
216	    qq(<title>Xlib Compose Keys for $locale_name</title>),
217	    q(<para>Applications using Xlib input handling should recognize),
218	    q( these compose key sequences in locales using the),
219	    qq( $locale_name compose table.</para>),
220	    "\n"
221      );
222
223  if (@included_files) {
224    print $OUTPUT
225	q(<para>This compose table includes the non-conflicting),
226	q( entries from: ),
227	join(',', @included_files),
228	q(.  Those entries are not shown here - see those charts for the),
229	q( included key sequences.</para>),
230	"\n";
231  }
232
233  my @pretable_comments = ();
234
235  if ($keyseq_count == 0) {
236    @pretable_comments = @compose_table;
237  } elsif ($compose_table[0]->{-type} eq 'comment') {
238    push @pretable_comments, shift @compose_table;
239  }
240
241  foreach my $comment_ref (@pretable_comments) {
242    print $OUTPUT
243	qq(<para>), xml_escape($comment_ref->{-comment}), qq(</para>\n);
244  }
245
246  if ($keyseq_count > 0) {
247    start_table($OUTPUT);
248    my $row_count = 0;
249
250    foreach my $cr (@compose_table) {
251
252      if ($row_count++ > 750) {
253	# Break tables every 750 rows to avoid overflowing
254	# xmlto/xsltproc limits on the largest tables
255	end_table($OUTPUT);
256	start_table($OUTPUT);
257	$row_count = 0;
258      }
259
260      if ($cr->{-type} eq 'comment') {
261	print $OUTPUT
262	    qq(<row><entry namest='seq' nameend='action'>),
263	    xml_escape($cr->{-comment}), qq(</entry></row>\n);
264      } elsif ($cr->{-type} eq 'keyseq') {
265	my $action = join(" ", xml_escape($cr->{-action}));
266	if ($action =~ m{^\s*"\\([0-7]+)"}) {
267	  my $char = oct($1);
268	  if ($char >= 32) {
269	    $action =~ s{^\s*"\\[0-7]+"}{"&#$char;"};
270	  }
271	}
272	$action =~ s{^\s*"(.+)"}{"$1"};
273
274	print $OUTPUT
275	    qq(<row><entry>),
276	    qq(<keycombo action='seq'>),
277	    (map { qq(<keysym>$_</keysym>) } xml_escape(@{$cr->{-keys}})),
278	    qq(</keycombo>),
279	    qq(</entry><entry>),
280	    $action,
281	    qq(</entry></row>\n);
282      }
283    }
284
285    end_table($OUTPUT);
286  } else {
287    print $OUTPUT
288	qq(<para><emphasis>),
289	qq(This compose table defines no sequences of its own.),
290	qq(</emphasis></para>\n);
291  }
292  print $OUTPUT "</section>\n";
293
294  close $OUTPUT or die "Couldn't write $output_filename: $!";
295
296  return $errors;
297}
298
299sub xml_escape {
300  my @output;
301
302  foreach my $l (@_) {
303      $l =~ s{\&}{&amp;}g;
304      $l =~ s{\<}{&lt;}g;
305      $l =~ s{\>}{&gt;}g;
306      push @output, $l;
307  }
308  return @output;
309}
310
311sub start_table {
312  my ($OUTPUT) = @_;
313
314  print $OUTPUT
315      join("\n",
316	   qq(<table><title>Compose Key Sequences for $locale_name</title>),
317	   qq(<tgroup cols='2'>),
318	   qq( <colspec colname='seq' /><colspec colname='action' />),
319	   qq( <thead><row>),
320	   qq(  <entry>Key Sequence</entry><entry>Action</entry>),
321	   qq( </row></thead>),
322	   qq( <tbody>\n),
323      );
324}
325
326sub end_table {
327  my ($OUTPUT) = @_;
328
329  print $OUTPUT "</tbody>\n</tgroup>\n</table>\n";
330}
331
332__END__
333
334=head1 NAME
335
336compose-chart - Make DocBook/XML charts of compose table entries
337
338=head1 SYNOPSIS
339
340compose-chart [options] [file ...]
341
342 Options:
343    --charset[=<cset>]	character set to specify in XML doctype
344    --locale=<locale>	name of locale to display in chart
345    --output=<file>	filename to output chart to
346    --index		make index of charts instead of individual chart
347    --help		brief help message
348    --man		full documentation
349
350=head1 OPTIONS
351
352=over 8
353
354=item B<--charset>[=I<cset>]
355
356Specify a character set to list in the doctype declaration in the XML output.
357If not specified, attempts to guess from the locale name, else default to
358"utf-8".
359
360=item B<--locale>=I<locale>
361
362Specify the locale name to use in the chart titles and introductory text.
363
364=item B<--output>=I<file>
365
366Specify the output file to write the DocBook output to.
367
368=item B<--index>
369
370Generate an index of the listed locale charts instead of a chart for a
371specific locale.
372
373=item B<--help>
374
375Print a brief help message and exit.
376
377=item B<--man>
378
379Print the manual page and exit.
380
381=back
382
383=head1 DESCRIPTION
384
385This program will read the given compose table file(s) and generate
386DocBook/XML charts listing the available characters for end-user reference.
387
388=cut
389