Home | History | Annotate | Line # | Download | only in 0-old
Mdoc.pm revision 1.1
      1 =head1 NAME
      2 
      3 Mdoc - perl module to parse Mdoc macros
      4 
      5 =head1 SYNOPSIS
      6 
      7     use Mdoc qw(ns pp soff son stoggle mapwords);
      8 
      9 See mdoc2man and mdoc2texi for code examples.
     10 
     11 =head1 FUNCTIONS
     12 
     13 =over 4
     14 
     15 =item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] )
     16 
     17 Define new macro. The CODE reference will be called by call_macro(). You can
     18 have two distinct definitions for and inline macro and for a standalone macro
     19 (i. e. 'Pa' and '.Pa').
     20 
     21 The CODE reference is passed a list of arguments and is expected to return list
     22 of strings and control characters (see C<CONSTANTS>).
     23 
     24 By default the surrouding "" from arguments to macros are removed, use C<raw>
     25 to disable this.
     26 
     27 Normaly CODE reference is passed all arguments up to next nested macro. Set
     28 C<greedy> to to pass everything up to the end of the line.
     29 
     30 If the concat_until is present, the line is concated until the .Xx macro is
     31 found. For example the following macro definition
     32 
     33     def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' }
     34     def_macro('.Cm', sub { mapwords {'($_)'} @_ } }
     35 
     36 and the following input
     37 
     38     .Oo
     39     .Cm foo |
     40     .Cm bar |
     41     .Oc
     42 
     43 results in [(foo) | (bar)]
     44 
     45 =item get_macro( NAME )
     46 
     47 Returns a hash reference like:
     48 
     49     { run => CODE, raw => [1|0], greedy => [1|0] }
     50 
     51 Where C<CODE> is the CODE reference used to define macro called C<NAME>
     52 
     53 =item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE )
     54 
     55 Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a
     56 list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving
     57 caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is
     58 defined it calls it prior to passing argument to a macro, giving caller a
     59 chance to alter them.  if EOF was reached undef is returned.
     60 
     61 =item call_macro( MACRO, ARGS, ... )
     62 
     63 Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is
     64 called and for all the nested macros. Every called macro returns a list which
     65 is appended to return value and returned when all nested macros are processed.
     66 Use to_string() to produce a printable string from the list.
     67 
     68 =item to_string ( LIST )
     69 
     70 Processes C<LIST> returned from call_macro() and returns formatted string.
     71 
     72 =item mapwords BLOCK ARRAY
     73 
     74 This is like perl's map only it calls BLOCK only on elements which are not
     75 punctuation or control characters.
     76 
     77 =item space ( ['on'|'off] )
     78 
     79 Turn spacing on or off. If called without argument it returns the current state.
     80 
     81 =item gen_encloser ( START, END )
     82 
     83 Helper function for generating macros that enclose their arguments.
     84     gen_encloser(qw({ }));
     85 returns
     86     sub { '{', ns, @_, ns, pp('}')}
     87 
     88 =item set_Bl_callback( CODE , DEFS )
     89 
     90 This module implements the Bl/El macros for you. Using set_Bl_callback you can
     91 provide a macro definition that should be executed on a .Bl call.
     92 
     93 =item set_El_callback( CODE , DEFS )
     94 
     95 This module implements the Bl/El macros for you. Using set_El_callback you can
     96 provide a macro definition that should be executed on a .El call.
     97 
     98 =item set_Re_callback( CODE )
     99 
    100 The C<CODE> is called after a Rs/Re block is done. With a hash reference as a
    101 parameter, describing the reference.
    102 
    103 =back 
    104 
    105 =head1 CONSTANTS
    106 
    107 =over 4
    108 
    109 =item ns
    110 
    111 Indicate 'no space' between to members of the list.
    112 
    113 =item pp ( STRING )
    114 
    115 The string is 'punctuation point'. It means that every punctuation
    116 preceeding that element is put behind it. 
    117 
    118 =item soff
    119 
    120 Turn spacing off.
    121 
    122 =item son
    123 
    124 Turn spacing on.
    125 
    126 =item stoggle
    127 
    128 Toogle spacing.
    129 
    130 =item hs
    131 
    132 Print space no matter spacing mode.
    133 
    134 =back
    135 
    136 =head1 TODO
    137 
    138 * The concat_until only works with standalone macros. This means that
    139     .Po blah Pc
    140 will hang until .Pc in encountered.
    141 
    142 * Provide default macros for Bd/Ed
    143 
    144 * The reference implementation is uncomplete
    145 
    146 =cut
    147 
    148 package Mdoc;
    149 use strict;
    150 use warnings;
    151 use List::Util qw(reduce);
    152 use Text::ParseWords qw(quotewords);
    153 use Carp;
    154 use Exporter qw(import);
    155 our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl);
    156 
    157 use constant {
    158     ns      => ['nospace'],
    159     soff    => ['spaceoff'],
    160     son     => ['spaceon'],
    161     stoggle => ['spacetoggle'],
    162     hs      => ['hardspace'],
    163 };
    164 
    165 sub pp { 
    166     my $c = shift;
    167     return ['pp', $c ];
    168 }
    169 sub gen_encloser {
    170     my ($o, $c) = @_;
    171     return sub { ($o, ns, @_, ns, pp($c)) };
    172 }
    173 
    174 sub mapwords(&@) {
    175     my ($f, @l) = @_;
    176     my @res;
    177     for my $el (@l) {
    178         local $_ = $el;
    179         push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ? 
    180                     $el : $f->();
    181     }
    182     return @res;
    183 }
    184 
    185 my %macros;
    186 
    187 ###############################################################################
    188 
    189 # Default macro definitions start
    190 
    191 ###############################################################################
    192 
    193 def_macro('Xo',  sub { @_ }, concat_until => '.Xc');
    194 
    195 def_macro('.Ns', sub {ns, @_});
    196 def_macro('Ns',  sub {ns, @_});
    197 
    198 {
    199     my %reference;
    200     def_macro('.Rs', sub { () } );
    201     def_macro('.%A', sub {
    202         if ($reference{authors}) {
    203             $reference{authors} .= " and @_"
    204         }
    205         else {
    206             $reference{authors} = "@_";
    207         }
    208         return ();
    209     });
    210     def_macro('.%T', sub { $reference{title} = "@_"; () } );
    211     def_macro('.%O', sub { $reference{optional} = "@_"; () } );
    212 
    213     sub set_Re_callback {
    214         my ($sub) = @_;
    215         croak 'Not a CODE reference' if not ref $sub eq 'CODE';
    216         def_macro('.Re', sub { 
    217             my @ret = $sub->(\%reference);
    218             %reference = (); @ret
    219         });
    220         return;
    221     }
    222 }
    223 
    224 def_macro('.Bl', sub { die '.Bl - no list callback set' });
    225 def_macro('.It', sub { die ".It called outside of list context - maybe near line $." });
    226 def_macro('.El', sub { die '.El requires .Bl first' });
    227 
    228 
    229 { 
    230     my $elcb = sub { () };
    231 
    232     sub set_El_callback {
    233         my ($sub) = @_;
    234         croak 'Not a CODE reference' if ref $sub ne 'CODE';
    235         $elcb = $sub;
    236         return;
    237     }
    238 
    239     sub set_Bl_callback {
    240         my ($blcb, %defs) = @_;
    241         croak 'Not a CODE reference' if ref $blcb ne 'CODE';
    242         def_macro('.Bl', sub { 
    243 
    244             my $orig_it   = get_macro('.It');
    245             my $orig_el   = get_macro('.El');
    246             my $orig_bl   = get_macro('.Bl');
    247             my $orig_elcb = $elcb;
    248 
    249             # Restore previous .It and .El on each .El
    250             def_macro('.El', sub {
    251                     def_macro('.El', delete $orig_el->{run}, %$orig_el);
    252                     def_macro('.It', delete $orig_it->{run}, %$orig_it);
    253                     def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl);
    254                     my @ret = $elcb->(@_);
    255                     $elcb = $orig_elcb;
    256                     @ret
    257                 });
    258             $blcb->(@_) 
    259         }, %defs);
    260         return;
    261     }
    262 }
    263 
    264 def_macro('.Sm', sub { 
    265     my ($arg) = @_;
    266     if (defined $arg) {
    267         space($arg);
    268     } else {
    269         space() eq 'off' ? 
    270             space('on') : 
    271             space('off'); 
    272     }
    273     () 
    274 } );
    275 def_macro('Sm', do { my $off; sub { 
    276     my ($arg) = @_;
    277     if (defined $arg && $arg =~ /^(on|off)$/) {
    278         shift;
    279         if    ($arg eq 'off') { soff, @_; }
    280         elsif ($arg eq 'on')  { son, @_; }
    281     }
    282     else {
    283         stoggle, @_;
    284     }
    285 }} );
    286 
    287 ###############################################################################
    288 
    289 # Default macro definitions end
    290 
    291 ###############################################################################
    292 
    293 sub def_macro {
    294     croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2;
    295     my ($macro, $sub, %def) = @_;
    296     croak 'Not a CODE reference' if ref $sub ne 'CODE';
    297 
    298     $macros{ $macro } = { 
    299         run          => $sub,
    300         greedy       => delete $def{greedy} || 0,
    301         raw          => delete $def{raw}    || 0,
    302         concat_until => delete $def{concat_until},
    303     };
    304     if ($macros{ $macro }{concat_until}) {
    305         $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } };
    306         $macros{ $macro }{greedy}                  = 1;
    307     }
    308     return;
    309 }
    310 
    311 sub get_macro {
    312     my ($macro) = @_;
    313     croak "Macro <$macro> not defined" if not exists $macros{ $macro };
    314     +{ %{ $macros{ $macro } } }
    315 }
    316 
    317 #TODO: document this
    318 sub parse_opts {
    319     my %args;
    320     my $last;
    321     for (@_) {
    322         if ($_ =~ /^\\?-/) {
    323             s/^\\?-//;
    324             $args{$_} = 1;
    325             $last = _unquote($_);
    326         }
    327         else {
    328             $args{$last} = _unquote($_) if $last;
    329             undef $last;
    330         }
    331     }
    332     return %args;
    333 }
    334 
    335 sub _is_control {
    336     my ($el, $expected) = @_;
    337     if (defined $expected) {
    338         ref $el eq 'ARRAY' and $el->[0] eq $expected;
    339     }
    340     else {
    341         ref $el eq 'ARRAY';
    342     }
    343 }
    344 
    345 {
    346     my $sep = ' ';
    347 
    348     sub to_string {
    349         if (@_ > 0) { 
    350             # Handle punctunation
    351             my ($in_brace, @punct) = '';
    352             my @new = map {
    353                 if (/^([\[\(])$/) {
    354                     ($in_brace = $1) =~ tr/([/)]/;
    355                     $_, ns
    356                 }
    357                 elsif (/^([\)\]])$/ && $in_brace eq $1) {
    358                     $in_brace = '';
    359                     ns, $_
    360                 }
    361                 elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) {
    362                     push @punct, ns, $_;
    363                     ();
    364                 }
    365                 elsif (_is_control($_, 'pp')) {
    366                     $_->[1]
    367                 }
    368                 elsif (_is_control($_)) {
    369                     $_
    370                 }
    371                 else {
    372                     splice (@punct), $_;
    373                 }
    374             } @_;
    375             push @new, @punct;
    376 
    377             # Produce string out of an array dealing with the special control characters
    378             # space('off') must but one character delayed
    379             my ($no_space, $space_off) = 1;
    380             my $res = '';
    381             while (defined(my $el = shift @new)) {
    382                 if    (_is_control($el, 'hardspace'))   { $no_space = 1; $res .= ' ' }
    383                 elsif (_is_control($el, 'nospace'))     { $no_space = 1;             }
    384                 elsif (_is_control($el, 'spaceoff'))    { $space_off = 1;            }
    385                 elsif (_is_control($el, 'spaceon'))     { space('on');               }
    386                 elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ? 
    387                                                             $space_off = 1 : 
    388                                                             space('on')              }
    389                 else {
    390                     if ($no_space) {
    391                         $no_space = 0;
    392                         $res .= "$el"
    393                     }
    394                     else {
    395                         $res .= "$sep$el"
    396                     }
    397 
    398                     if ($space_off)    { space('off'); $space_off = 0; }
    399                 }
    400             }
    401             $res
    402         }
    403         else { 
    404             '';
    405         }
    406     }
    407 
    408     sub space {
    409         my ($arg) = @_;
    410         if (defined $arg && $arg =~ /^(on|off)$/) {
    411             $sep = ' ' if $arg eq 'on';
    412             $sep = ''  if $arg eq 'off';
    413             return;
    414         }
    415         else {
    416             return $sep eq '' ? 'off' : 'on';
    417         }
    418     }
    419 }
    420 
    421 sub _unquote {
    422     my @args = @_;
    423     $_ =~ s/^"([^"]+)"$/$1/g for @args;
    424     wantarray ? @args : $args[0];
    425 }
    426 
    427 sub call_macro {
    428     my ($macro, @args) = @_;
    429     my @ret; 
    430 
    431     my @newargs;
    432     my $i = 0;
    433 
    434     @args = _unquote(@args) if (!$macros{ $macro }{raw});
    435 
    436     # Call any callable macros in the argument list
    437     for (@args) {
    438         if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) {
    439             push @ret, call_macro($_, @args[$i+1 .. $#args]);
    440             last;
    441         } else {
    442             if ($macros{ $macro }{greedy}) {
    443                 push @ret, $_;
    444             }
    445             else {
    446                 push @newargs, $_;
    447             }
    448         }
    449         $i++;
    450     }
    451 
    452     if ($macros{ $macro }{concat_until}) {
    453         my ($n_macro, @n_args) = ('');
    454         while (1) {
    455             die "EOF was reached and no $macros{ $macro }{concat_until} found" 
    456                 if not defined $n_macro;
    457             ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift });
    458             if ($n_macro eq $macros{ $macro }{concat_until}) {
    459                 push @ret, call_macro($n_macro, @n_args);
    460                 last;
    461             }
    462             else {
    463                 $n_macro =~ s/^\.//;
    464                 push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro };
    465             }
    466         }
    467     }
    468 
    469     if ($macros{ $macro }{greedy}) {
    470         #print "MACROG $macro (", (join ', ', @ret), ")\n";
    471         return $macros{ $macro }{run}->(@ret);
    472     }
    473     else {
    474         #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n";
    475         return $macros{ $macro }{run}->(@newargs), @ret;
    476     }
    477 }
    478 
    479 {
    480     my ($in_fh, $out_sub, $preprocess_sub);
    481     sub parse_line {
    482         $in_fh          = $_[0] if defined $_[0] || !defined $in_fh;
    483         $out_sub        = $_[1] if defined $_[1] || !defined $out_sub;
    484         $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub;
    485 
    486         croak 'out_sub not a CODE reference' 
    487             if not ref $out_sub eq 'CODE';
    488         croak 'preprocess_sub not a CODE reference' 
    489             if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE';
    490 
    491         while (my $line = <$in_fh>) {
    492             chomp $line;
    493             if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ || 
    494                 $line =~ /^\.\\"/) 
    495             {
    496                 $line =~ s/ +/ /g;
    497                 my ($macro, @args) = quotewords(' ', 1, $line);
    498                 @args = grep { defined $_ } @args;
    499                 $preprocess_sub->(@args) if defined $preprocess_sub;
    500                 if ($macro && exists $macros{ $macro }) {
    501                     return ($macro, @args);
    502                 } else {
    503                     $out_sub->($line);
    504                 }
    505             }
    506             else {
    507                 $out_sub->($line);
    508             }
    509         }
    510         return;
    511     }
    512 }
    513 
    514 1;
    515 __END__
    516