Mdoc.pm revision 1.1 1 =head1 NAME
2
3 Mdoc - perl module to parse Mdoc macros
4
5 =head1 SYNOPSIS
6
7 use Mdoc qw(ns pp soff son stoggle mapwords);
8
9 See mdoc2man and mdoc2texi for code examples.
10
11 =head1 FUNCTIONS
12
13 =over 4
14
15 =item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] )
16
17 Define new macro. The CODE reference will be called by call_macro(). You can
18 have two distinct definitions for and inline macro and for a standalone macro
19 (i. e. 'Pa' and '.Pa').
20
21 The CODE reference is passed a list of arguments and is expected to return list
22 of strings and control characters (see C<CONSTANTS>).
23
24 By default the surrouding "" from arguments to macros are removed, use C<raw>
25 to disable this.
26
27 Normaly CODE reference is passed all arguments up to next nested macro. Set
28 C<greedy> to to pass everything up to the end of the line.
29
30 If the concat_until is present, the line is concated until the .Xx macro is
31 found. For example the following macro definition
32
33 def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' }
34 def_macro('.Cm', sub { mapwords {'($_)'} @_ } }
35
36 and the following input
37
38 .Oo
39 .Cm foo |
40 .Cm bar |
41 .Oc
42
43 results in [(foo) | (bar)]
44
45 =item get_macro( NAME )
46
47 Returns a hash reference like:
48
49 { run => CODE, raw => [1|0], greedy => [1|0] }
50
51 Where C<CODE> is the CODE reference used to define macro called C<NAME>
52
53 =item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE )
54
55 Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a
56 list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving
57 caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is
58 defined it calls it prior to passing argument to a macro, giving caller a
59 chance to alter them. if EOF was reached undef is returned.
60
61 =item call_macro( MACRO, ARGS, ... )
62
63 Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is
64 called and for all the nested macros. Every called macro returns a list which
65 is appended to return value and returned when all nested macros are processed.
66 Use to_string() to produce a printable string from the list.
67
68 =item to_string ( LIST )
69
70 Processes C<LIST> returned from call_macro() and returns formatted string.
71
72 =item mapwords BLOCK ARRAY
73
74 This is like perl's map only it calls BLOCK only on elements which are not
75 punctuation or control characters.
76
77 =item space ( ['on'|'off] )
78
79 Turn spacing on or off. If called without argument it returns the current state.
80
81 =item gen_encloser ( START, END )
82
83 Helper function for generating macros that enclose their arguments.
84 gen_encloser(qw({ }));
85 returns
86 sub { '{', ns, @_, ns, pp('}')}
87
88 =item set_Bl_callback( CODE , DEFS )
89
90 This module implements the Bl/El macros for you. Using set_Bl_callback you can
91 provide a macro definition that should be executed on a .Bl call.
92
93 =item set_El_callback( CODE , DEFS )
94
95 This module implements the Bl/El macros for you. Using set_El_callback you can
96 provide a macro definition that should be executed on a .El call.
97
98 =item set_Re_callback( CODE )
99
100 The C<CODE> is called after a Rs/Re block is done. With a hash reference as a
101 parameter, describing the reference.
102
103 =back
104
105 =head1 CONSTANTS
106
107 =over 4
108
109 =item ns
110
111 Indicate 'no space' between to members of the list.
112
113 =item pp ( STRING )
114
115 The string is 'punctuation point'. It means that every punctuation
116 preceeding that element is put behind it.
117
118 =item soff
119
120 Turn spacing off.
121
122 =item son
123
124 Turn spacing on.
125
126 =item stoggle
127
128 Toogle spacing.
129
130 =item hs
131
132 Print space no matter spacing mode.
133
134 =back
135
136 =head1 TODO
137
138 * The concat_until only works with standalone macros. This means that
139 .Po blah Pc
140 will hang until .Pc in encountered.
141
142 * Provide default macros for Bd/Ed
143
144 * The reference implementation is uncomplete
145
146 =cut
147
148 package Mdoc;
149 use strict;
150 use warnings;
151 use List::Util qw(reduce);
152 use Text::ParseWords qw(quotewords);
153 use Carp;
154 use Exporter qw(import);
155 our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl);
156
157 use constant {
158 ns => ['nospace'],
159 soff => ['spaceoff'],
160 son => ['spaceon'],
161 stoggle => ['spacetoggle'],
162 hs => ['hardspace'],
163 };
164
165 sub pp {
166 my $c = shift;
167 return ['pp', $c ];
168 }
169 sub gen_encloser {
170 my ($o, $c) = @_;
171 return sub { ($o, ns, @_, ns, pp($c)) };
172 }
173
174 sub mapwords(&@) {
175 my ($f, @l) = @_;
176 my @res;
177 for my $el (@l) {
178 local $_ = $el;
179 push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ?
180 $el : $f->();
181 }
182 return @res;
183 }
184
185 my %macros;
186
187 ###############################################################################
188
189 # Default macro definitions start
190
191 ###############################################################################
192
193 def_macro('Xo', sub { @_ }, concat_until => '.Xc');
194
195 def_macro('.Ns', sub {ns, @_});
196 def_macro('Ns', sub {ns, @_});
197
198 {
199 my %reference;
200 def_macro('.Rs', sub { () } );
201 def_macro('.%A', sub {
202 if ($reference{authors}) {
203 $reference{authors} .= " and @_"
204 }
205 else {
206 $reference{authors} = "@_";
207 }
208 return ();
209 });
210 def_macro('.%T', sub { $reference{title} = "@_"; () } );
211 def_macro('.%O', sub { $reference{optional} = "@_"; () } );
212
213 sub set_Re_callback {
214 my ($sub) = @_;
215 croak 'Not a CODE reference' if not ref $sub eq 'CODE';
216 def_macro('.Re', sub {
217 my @ret = $sub->(\%reference);
218 %reference = (); @ret
219 });
220 return;
221 }
222 }
223
224 def_macro('.Bl', sub { die '.Bl - no list callback set' });
225 def_macro('.It', sub { die ".It called outside of list context - maybe near line $." });
226 def_macro('.El', sub { die '.El requires .Bl first' });
227
228
229 {
230 my $elcb = sub { () };
231
232 sub set_El_callback {
233 my ($sub) = @_;
234 croak 'Not a CODE reference' if ref $sub ne 'CODE';
235 $elcb = $sub;
236 return;
237 }
238
239 sub set_Bl_callback {
240 my ($blcb, %defs) = @_;
241 croak 'Not a CODE reference' if ref $blcb ne 'CODE';
242 def_macro('.Bl', sub {
243
244 my $orig_it = get_macro('.It');
245 my $orig_el = get_macro('.El');
246 my $orig_bl = get_macro('.Bl');
247 my $orig_elcb = $elcb;
248
249 # Restore previous .It and .El on each .El
250 def_macro('.El', sub {
251 def_macro('.El', delete $orig_el->{run}, %$orig_el);
252 def_macro('.It', delete $orig_it->{run}, %$orig_it);
253 def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl);
254 my @ret = $elcb->(@_);
255 $elcb = $orig_elcb;
256 @ret
257 });
258 $blcb->(@_)
259 }, %defs);
260 return;
261 }
262 }
263
264 def_macro('.Sm', sub {
265 my ($arg) = @_;
266 if (defined $arg) {
267 space($arg);
268 } else {
269 space() eq 'off' ?
270 space('on') :
271 space('off');
272 }
273 ()
274 } );
275 def_macro('Sm', do { my $off; sub {
276 my ($arg) = @_;
277 if (defined $arg && $arg =~ /^(on|off)$/) {
278 shift;
279 if ($arg eq 'off') { soff, @_; }
280 elsif ($arg eq 'on') { son, @_; }
281 }
282 else {
283 stoggle, @_;
284 }
285 }} );
286
287 ###############################################################################
288
289 # Default macro definitions end
290
291 ###############################################################################
292
293 sub def_macro {
294 croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2;
295 my ($macro, $sub, %def) = @_;
296 croak 'Not a CODE reference' if ref $sub ne 'CODE';
297
298 $macros{ $macro } = {
299 run => $sub,
300 greedy => delete $def{greedy} || 0,
301 raw => delete $def{raw} || 0,
302 concat_until => delete $def{concat_until},
303 };
304 if ($macros{ $macro }{concat_until}) {
305 $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } };
306 $macros{ $macro }{greedy} = 1;
307 }
308 return;
309 }
310
311 sub get_macro {
312 my ($macro) = @_;
313 croak "Macro <$macro> not defined" if not exists $macros{ $macro };
314 +{ %{ $macros{ $macro } } }
315 }
316
317 #TODO: document this
318 sub parse_opts {
319 my %args;
320 my $last;
321 for (@_) {
322 if ($_ =~ /^\\?-/) {
323 s/^\\?-//;
324 $args{$_} = 1;
325 $last = _unquote($_);
326 }
327 else {
328 $args{$last} = _unquote($_) if $last;
329 undef $last;
330 }
331 }
332 return %args;
333 }
334
335 sub _is_control {
336 my ($el, $expected) = @_;
337 if (defined $expected) {
338 ref $el eq 'ARRAY' and $el->[0] eq $expected;
339 }
340 else {
341 ref $el eq 'ARRAY';
342 }
343 }
344
345 {
346 my $sep = ' ';
347
348 sub to_string {
349 if (@_ > 0) {
350 # Handle punctunation
351 my ($in_brace, @punct) = '';
352 my @new = map {
353 if (/^([\[\(])$/) {
354 ($in_brace = $1) =~ tr/([/)]/;
355 $_, ns
356 }
357 elsif (/^([\)\]])$/ && $in_brace eq $1) {
358 $in_brace = '';
359 ns, $_
360 }
361 elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) {
362 push @punct, ns, $_;
363 ();
364 }
365 elsif (_is_control($_, 'pp')) {
366 $_->[1]
367 }
368 elsif (_is_control($_)) {
369 $_
370 }
371 else {
372 splice (@punct), $_;
373 }
374 } @_;
375 push @new, @punct;
376
377 # Produce string out of an array dealing with the special control characters
378 # space('off') must but one character delayed
379 my ($no_space, $space_off) = 1;
380 my $res = '';
381 while (defined(my $el = shift @new)) {
382 if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' }
383 elsif (_is_control($el, 'nospace')) { $no_space = 1; }
384 elsif (_is_control($el, 'spaceoff')) { $space_off = 1; }
385 elsif (_is_control($el, 'spaceon')) { space('on'); }
386 elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ?
387 $space_off = 1 :
388 space('on') }
389 else {
390 if ($no_space) {
391 $no_space = 0;
392 $res .= "$el"
393 }
394 else {
395 $res .= "$sep$el"
396 }
397
398 if ($space_off) { space('off'); $space_off = 0; }
399 }
400 }
401 $res
402 }
403 else {
404 '';
405 }
406 }
407
408 sub space {
409 my ($arg) = @_;
410 if (defined $arg && $arg =~ /^(on|off)$/) {
411 $sep = ' ' if $arg eq 'on';
412 $sep = '' if $arg eq 'off';
413 return;
414 }
415 else {
416 return $sep eq '' ? 'off' : 'on';
417 }
418 }
419 }
420
421 sub _unquote {
422 my @args = @_;
423 $_ =~ s/^"([^"]+)"$/$1/g for @args;
424 wantarray ? @args : $args[0];
425 }
426
427 sub call_macro {
428 my ($macro, @args) = @_;
429 my @ret;
430
431 my @newargs;
432 my $i = 0;
433
434 @args = _unquote(@args) if (!$macros{ $macro }{raw});
435
436 # Call any callable macros in the argument list
437 for (@args) {
438 if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) {
439 push @ret, call_macro($_, @args[$i+1 .. $#args]);
440 last;
441 } else {
442 if ($macros{ $macro }{greedy}) {
443 push @ret, $_;
444 }
445 else {
446 push @newargs, $_;
447 }
448 }
449 $i++;
450 }
451
452 if ($macros{ $macro }{concat_until}) {
453 my ($n_macro, @n_args) = ('');
454 while (1) {
455 die "EOF was reached and no $macros{ $macro }{concat_until} found"
456 if not defined $n_macro;
457 ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift });
458 if ($n_macro eq $macros{ $macro }{concat_until}) {
459 push @ret, call_macro($n_macro, @n_args);
460 last;
461 }
462 else {
463 $n_macro =~ s/^\.//;
464 push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro };
465 }
466 }
467 }
468
469 if ($macros{ $macro }{greedy}) {
470 #print "MACROG $macro (", (join ', ', @ret), ")\n";
471 return $macros{ $macro }{run}->(@ret);
472 }
473 else {
474 #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n";
475 return $macros{ $macro }{run}->(@newargs), @ret;
476 }
477 }
478
479 {
480 my ($in_fh, $out_sub, $preprocess_sub);
481 sub parse_line {
482 $in_fh = $_[0] if defined $_[0] || !defined $in_fh;
483 $out_sub = $_[1] if defined $_[1] || !defined $out_sub;
484 $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub;
485
486 croak 'out_sub not a CODE reference'
487 if not ref $out_sub eq 'CODE';
488 croak 'preprocess_sub not a CODE reference'
489 if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE';
490
491 while (my $line = <$in_fh>) {
492 chomp $line;
493 if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ ||
494 $line =~ /^\.\\"/)
495 {
496 $line =~ s/ +/ /g;
497 my ($macro, @args) = quotewords(' ', 1, $line);
498 @args = grep { defined $_ } @args;
499 $preprocess_sub->(@args) if defined $preprocess_sub;
500 if ($macro && exists $macros{ $macro }) {
501 return ($macro, @args);
502 } else {
503 $out_sub->($line);
504 }
505 }
506 else {
507 $out_sub->($line);
508 }
509 }
510 return;
511 }
512 }
513
514 1;
515 __END__
516