Home | History | Annotate | Line # | Download | only in OpenSSL
      1 #! /usr/bin/env perl
      2 # Copyright 2018-2024 The OpenSSL Project Authors. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License 2.0 (the "License").  You may not use
      5 # this file except in compliance with the License.  You can obtain a copy
      6 # in the file LICENSE in the source distribution or at
      7 # https://www.openssl.org/source/license.html
      8 
      9 package OpenSSL::ParseC;
     10 
     11 use strict;
     12 use warnings;
     13 
     14 use Exporter;
     15 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
     16 $VERSION = "0.9";
     17 @ISA = qw(Exporter);
     18 @EXPORT = qw(parse);
     19 
     20 # Global handler data
     21 my @preprocessor_conds;         # A list of simple preprocessor conditions,
     22                                 # each item being a list of macros defined
     23                                 # or not defined.
     24 
     25 # Handler helpers
     26 sub all_conds {
     27     return map { ( @$_ ) } @preprocessor_conds;
     28 }
     29 
     30 # A list of handlers that will look at a "complete" string and try to
     31 # figure out what to make of it.
     32 # Each handler is a hash with the following keys:
     33 #
     34 # regexp                a regexp to compare the "complete" string with.
     35 # checker               a function that does a more complex comparison.
     36 #                       Use this instead of regexp if that isn't enough.
     37 # massager              massages the "complete" string into an array with
     38 #                       the following elements:
     39 #
     40 #                       [0]     String that needs further processing (this
     41 #                               applies to typedefs of structs), or empty.
     42 #                       [1]     The name of what was found.
     43 #                       [2]     A character that denotes what type of thing
     44 #                               this is: 'F' for function, 'S' for struct,
     45 #                               'T' for typedef, 'M' for macro, 'V' for
     46 #                               variable.
     47 #                       [3]     Return type (only for type 'F' and 'V')
     48 #                       [4]     Value (for type 'M') or signature (for type 'F',
     49 #                               'V', 'T' or 'S')
     50 #                       [5...]  The list of preprocessor conditions this is
     51 #                               found in, as in checks for macro definitions
     52 #                               (stored as the macro's name) or the absence
     53 #                               of definition (stored as the macro's name
     54 #                               prefixed with a '!'
     55 #
     56 #                       If the massager returns an empty list, it means the
     57 #                       "complete" string has side effects but should otherwise
     58 #                       be ignored.
     59 #                       If the massager is undefined, the "complete" string
     60 #                       should be ignored.
     61 my @opensslcpphandlers = (
     62     ##################################################################
     63     # OpenSSL CPP specials
     64     #
     65     # These are used to convert certain pre-precessor expressions into
     66     # others that @cpphandlers have a better chance to understand.
     67 
     68     # This changes any OPENSSL_NO_DEPRECATED_x_y[_z] check to a check of
     69     # OPENSSL_NO_DEPRECATEDIN_x_y[_z].  That's due to <openssl/macros.h>
     70     # creating OPENSSL_NO_DEPRECATED_x_y[_z], but the ordinals files using
     71     # DEPRECATEDIN_x_y[_z].
     72     { regexp   => qr/#if(def|ndef) OPENSSL_NO_DEPRECATED_(\d+_\d+(?:_\d+)?)$/,
     73       massager => sub {
     74           return (<<"EOF");
     75 #if$1 OPENSSL_NO_DEPRECATEDIN_$2
     76 EOF
     77       }
     78     }
     79 );
     80 my @cpphandlers = (
     81     ##################################################################
     82     # CPP stuff
     83 
     84     { regexp   => qr/#ifdef ?(.*)/,
     85       massager => sub {
     86           my %opts;
     87           if (ref($_[$#_]) eq "HASH") {
     88               %opts = %{$_[$#_]};
     89               pop @_;
     90           }
     91           push @preprocessor_conds, [ $1 ];
     92           print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
     93               if $opts{debug};
     94           return ();
     95       },
     96     },
     97     { regexp   => qr/#ifndef ?(.*)/,
     98       massager => sub {
     99           my %opts;
    100           if (ref($_[$#_]) eq "HASH") {
    101               %opts = %{$_[$#_]};
    102               pop @_;
    103           }
    104           push @preprocessor_conds, [ '!'.$1 ];
    105           print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
    106               if $opts{debug};
    107           return ();
    108       },
    109     },
    110     { regexp   => qr/#if (0|1)/,
    111       massager => sub {
    112           my %opts;
    113           if (ref($_[$#_]) eq "HASH") {
    114               %opts = %{$_[$#_]};
    115               pop @_;
    116           }
    117           if ($1 eq "1") {
    118               push @preprocessor_conds, [ "TRUE" ];
    119           } else {
    120               push @preprocessor_conds, [ "!TRUE" ];
    121           }
    122           print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
    123               if $opts{debug};
    124           return ();
    125       },
    126     },
    127     { regexp   => qr/#if ?(.*)/,
    128       massager => sub {
    129           my %opts;
    130           if (ref($_[$#_]) eq "HASH") {
    131               %opts = %{$_[$#_]};
    132               pop @_;
    133           }
    134           my @results = ();
    135           my $conds = $1;
    136           if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) {
    137               push @results, $1; # Handle the simple case
    138               my $rest = $2;
    139               my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/;
    140               print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
    141                   if $opts{debug};
    142               if ($rest =~ m/$re/) {
    143                   my @rest = split /\|\|/, $rest;
    144                   shift @rest;
    145                   foreach (@rest) {
    146                       m|^defined<<<\(([^\)]*)\)>>>$|;
    147                       die "Something wrong...$opts{PLACE}" if $1 eq "";
    148                       push @results, $1;
    149                   }
    150               } else {
    151                   $conds =~ s/<<<|>>>//g;
    152                   warn "Warning: complicated #if expression(1): $conds$opts{PLACE}"
    153                       if $opts{warnings};
    154               }
    155           } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) {
    156               push @results, '!'.$1; # Handle the simple case
    157               my $rest = $2;
    158               my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/;
    159               print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n"
    160                   if $opts{debug};
    161               if ($rest =~ m/$re/) {
    162                   my @rest = split /\&\&/, $rest;
    163                   shift @rest;
    164                   foreach (@rest) {
    165                       m|^!defined<<<\(([^\)]*)\)>>>$|;
    166                       die "Something wrong...$opts{PLACE}" if $1 eq "";
    167                       push @results, '!'.$1;
    168                   }
    169               } else {
    170                   $conds =~ s/<<<|>>>//g;
    171                   warn "Warning: complicated #if expression(2): $conds$opts{PLACE}"
    172                       if $opts{warnings};
    173               }
    174           } else {
    175               $conds =~ s/<<<|>>>//g;
    176               warn "Warning: complicated #if expression(3): $conds$opts{PLACE}"
    177                   if $opts{warnings};
    178           }
    179           print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n"
    180               if $opts{debug};
    181           push @preprocessor_conds, [ @results ];
    182           print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
    183               if $opts{debug};
    184           return ();
    185       },
    186     },
    187     { regexp   => qr/#elif (.*)/,
    188       massager => sub {
    189           my %opts;
    190           if (ref($_[$#_]) eq "HASH") {
    191               %opts = %{$_[$#_]};
    192               pop @_;
    193           }
    194           die "An #elif without corresponding condition$opts{PLACE}"
    195               if !@preprocessor_conds;
    196           pop @preprocessor_conds;
    197           print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
    198               if $opts{debug};
    199           return (<<"EOF");
    200 #if $1
    201 EOF
    202       },
    203     },
    204     { regexp   => qr/#else/,
    205       massager => sub {
    206           my %opts;
    207           if (ref($_[$#_]) eq "HASH") {
    208               %opts = %{$_[$#_]};
    209               pop @_;
    210           }
    211           die "An #else without corresponding condition$opts{PLACE}"
    212               if !@preprocessor_conds;
    213           # Invert all conditions on the last level
    214           my $stuff = pop @preprocessor_conds;
    215           push @preprocessor_conds, [
    216               map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff
    217           ];
    218           print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
    219               if $opts{debug};
    220           return ();
    221       },
    222     },
    223     { regexp   => qr/#endif ?/,
    224       massager => sub {
    225           my %opts;
    226           if (ref($_[$#_]) eq "HASH") {
    227               %opts = %{$_[$#_]};
    228               pop @_;
    229           }
    230           die "An #endif without corresponding condition$opts{PLACE}"
    231               if !@preprocessor_conds;
    232           pop @preprocessor_conds;
    233           print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n"
    234               if $opts{debug};
    235           return ();
    236       },
    237     },
    238     { regexp   => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/,
    239       massager => sub {
    240           my $name = $1;
    241           my $params = $2;
    242           my $spaceval = $3||"";
    243           my $val = $4||"";
    244           return ("",
    245                   $1, 'M', "", $params ? "$name$params$spaceval" : $val,
    246                   all_conds()); }
    247     },
    248     { regexp   => qr/#.*/,
    249       massager => sub { return (); }
    250     },
    251     );
    252 
    253 my @opensslchandlers = (
    254     ##################################################################
    255     # OpenSSL C specials
    256     #
    257     # They are really preprocessor stuff, but they look like C stuff
    258     # to this parser.  All of these do replacements, anything else is
    259     # an error.
    260 
    261     #####
    262     # Deprecated stuff, by OpenSSL release.
    263 
    264     # OSSL_DEPRECATEDIN_x_y[_z] is simply ignored.  Such declarations are
    265     # supposed to be guarded with an '#ifdef OPENSSL_NO_DEPRECATED_x_y[_z]'
    266     { regexp   => qr/OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/,
    267       massager => sub { return $1; },
    268     },
    269     { regexp   => qr/OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?_FOR<<<.*>>>(.*)/,
    270       massager => sub { return $1; },
    271     },
    272     { regexp   => qr/(.*?)\s+OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/,
    273       massager => sub { return "$1 $2"; },
    274     },
    275     { regexp   => qr/(.*?)\s+OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?_FOR<<<.*>>>(.*)/,
    276       massager => sub { return "$1 $2"; },
    277     },
    278 
    279     #####
    280     # Core stuff
    281 
    282     # OSSL_CORE_MAKE_FUNC is a macro to create the necessary data and inline
    283     # function the libcrypto<->provider interface
    284     { regexp   => qr/OSSL_CORE_MAKE_FUNC<<<\((.*?),(.*?),(.*?)\)>>>/,
    285       massager => sub {
    286           return (<<"EOF");
    287 typedef $1 OSSL_FUNC_$2_fn$3;
    288 static ossl_inline OSSL_FUNC_$2_fn *OSSL_FUNC_$2(const OSSL_DISPATCH *opf);
    289 EOF
    290       },
    291     },
    292 
    293     #####
    294     # LHASH stuff
    295 
    296     # LHASH_OF(foo) is used as a type, but the chandlers won't take it
    297     # gracefully, so we expand it here.
    298     { regexp   => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/,
    299       massager => sub { return ("$1struct lhash_st_$2$3"); }
    300     },
    301     { regexp   => qr/DEFINE_LHASH_OF(?:_INTERNAL|_EX)?<<<\((.*)\)>>>/,
    302       massager => sub {
    303           return (<<"EOF");
    304 static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *),
    305                                             int (*cfn)(const $1 *, const $1 *));
    306 static ossl_inline void lh_$1_free(LHASH_OF($1) *lh);
    307 static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d);
    308 static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d);
    309 static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d);
    310 static ossl_inline int lh_$1_error(LHASH_OF($1) *lh);
    311 static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh);
    312 static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out);
    313 static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh,
    314                                                    BIO *out);
    315 static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out);
    316 static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh);
    317 static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl);
    318 static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *));
    319 LHASH_OF($1)
    320 EOF
    321       }
    322      },
    323 
    324     #####
    325     # STACK stuff
    326 
    327     # STACK_OF(foo) is used as a type, but the chandlers won't take it
    328     # gracefully, so we expand it here.
    329     { regexp   => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/,
    330       massager => sub { return ("$1struct stack_st_$2$3"); }
    331     },
    332 #    { regexp   => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/,
    333 #      massager => sub {
    334 #          my $before = $1;
    335 #          my $stack_of = "struct stack_st_$2";
    336 #          my $after = $3;
    337 #          if ($after =~ m|^\w|) { $after = " ".$after; }
    338 #          return ("$before$stack_of$after");
    339 #      }
    340 #    },
    341     { regexp   => qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
    342       massager => sub {
    343           return (<<"EOF");
    344 STACK_OF($1);
    345 typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
    346 typedef void (*sk_$1_freefunc)($3 *a);
    347 typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
    348 static ossl_inline int sk_$1_num(const STACK_OF($1) *sk);
    349 static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx);
    350 static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare);
    351 static ossl_inline STACK_OF($1) *sk_$1_new_null(void);
    352 static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare,
    353                                                    int n);
    354 static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n);
    355 static ossl_inline void sk_$1_free(STACK_OF($1) *sk);
    356 static ossl_inline void sk_$1_zero(STACK_OF($1) *sk);
    357 static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i);
    358 static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr);
    359 static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr);
    360 static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr);
    361 static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk);
    362 static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk);
    363 static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk,
    364                                        sk_$1_freefunc freefunc);
    365 static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx);
    366 static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr);
    367 static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr);
    368 static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr);
    369 static ossl_inline void sk_$1_sort(STACK_OF($1) *sk);
    370 static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk);
    371 static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk);
    372 static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk,
    373                                                  sk_$1_copyfunc copyfunc,
    374                                                  sk_$1_freefunc freefunc);
    375 static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk,
    376                                                      sk_$1_compfunc compare);
    377 EOF
    378       }
    379     },
    380     { regexp   => qr/SKM_DEFINE_STACK_OF_INTERNAL<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
    381       massager => sub {
    382           return (<<"EOF");
    383 STACK_OF($1);
    384 typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b);
    385 typedef void (*sk_$1_freefunc)($3 *a);
    386 typedef $3 * (*sk_$1_copyfunc)(const $3 *a);
    387 static ossl_unused ossl_inline $2 *ossl_check_$1_type($2 *ptr);
    388 static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_$1_sk_type(const STACK_OF($1) *sk);
    389 static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_$1_compfunc_type(sk_$1_compfunc cmp);
    390 static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_$1_copyfunc_type(sk_$1_copyfunc cpy);
    391 static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_$1_freefunc_type(sk_$1_freefunc fr);
    392 EOF
    393       }
    394     },
    395     { regexp   => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/,
    396       massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); },
    397     },
    398     { regexp   => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/,
    399       massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); },
    400     },
    401     { regexp   => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/,
    402       massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); },
    403     },
    404     { regexp   => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/,
    405       massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); },
    406     },
    407 
    408     #####
    409     # ASN1 stuff
    410     { regexp   => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/,
    411       massager => sub {
    412           return (<<"EOF");
    413 const ASN1_ITEM *$1_it(void);
    414 EOF
    415       },
    416     },
    417     { regexp   => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/,
    418       massager => sub {
    419           return (<<"EOF");
    420 int d2i_$2(void);
    421 int i2d_$2(void);
    422 EOF
    423       },
    424     },
    425     { regexp   => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/,
    426       massager => sub {
    427           return (<<"EOF");
    428 int d2i_$3(void);
    429 int i2d_$3(void);
    430 DECLARE_ASN1_ITEM($2)
    431 EOF
    432       },
    433     },
    434     { regexp   => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
    435       massager => sub {
    436           return (<<"EOF");
    437 int d2i_$2(void);
    438 int i2d_$2(void);
    439 DECLARE_ASN1_ITEM($2)
    440 EOF
    441       },
    442     },
    443     { regexp   => qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
    444       massager => sub {
    445           return (<<"EOF");
    446 int $2_free(void);
    447 int $2_new(void);
    448 EOF
    449       },
    450     },
    451     { regexp   => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/,
    452       massager => sub {
    453           return (<<"EOF");
    454 int $1_free(void);
    455 int $1_new(void);
    456 EOF
    457       },
    458     },
    459     { regexp   => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/,
    460       massager => sub {
    461           return (<<"EOF");
    462 int d2i_$2(void);
    463 int i2d_$2(void);
    464 int $2_free(void);
    465 int $2_new(void);
    466 DECLARE_ASN1_ITEM($2)
    467 EOF
    468       },
    469     },
    470     { regexp   => qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/,
    471       massager => sub { return (<<"EOF");
    472 int d2i_$1(void);
    473 int i2d_$1(void);
    474 int $1_free(void);
    475 int $1_new(void);
    476 DECLARE_ASN1_ITEM($1)
    477 EOF
    478       }
    479     },
    480     { regexp   => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/,
    481       massager => sub {
    482           return (<<"EOF");
    483 int i2d_$1_NDEF(void);
    484 EOF
    485       }
    486     },
    487     { regexp   => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/,
    488       massager => sub {
    489           return (<<"EOF");
    490 int $1_print_ctx(void);
    491 EOF
    492       }
    493     },
    494     { regexp   => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
    495       massager => sub {
    496           return (<<"EOF");
    497 int $2_print_ctx(void);
    498 EOF
    499       }
    500     },
    501     { regexp   => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/,
    502       massager => sub { return (); }
    503     },
    504     { regexp   => qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/,
    505       massager => sub {
    506           return (<<"EOF");
    507 int $1_dup(void);
    508 EOF
    509       }
    510     },
    511     { regexp   => qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/,
    512       massager => sub {
    513           return (<<"EOF");
    514 int $2_dup(void);
    515 EOF
    516       }
    517     },
    518     # Universal translator of attributed PEM declarators
    519     { regexp   => qr/
    520           DECLARE_ASN1
    521           (_ENCODE_FUNCTIONS_only|_ENCODE_FUNCTIONS|_ENCODE_FUNCTIONS_name
    522            |_ALLOC_FUNCTIONS_name|_ALLOC_FUNCTIONS|_FUNCTIONS_name|_FUNCTIONS
    523            |_NDEF_FUNCTION|_PRINT_FUNCTION|_PRINT_FUNCTION_name
    524            |_DUP_FUNCTION|_DUP_FUNCTION_name)
    525           _attr
    526           <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>>
    527       /x,
    528       massager => sub { return (<<"EOF");
    529 DECLARE_ASN1$1($3)
    530 EOF
    531       },
    532     },
    533     { regexp   => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/,
    534       massager => sub { return (); }
    535     },
    536 
    537     #####
    538     # PEM stuff
    539     { regexp   => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/,
    540       massager => sub { return (<<"EOF");
    541 #ifndef OPENSSL_NO_STDIO
    542 int PEM_read_$1(void);
    543 int PEM_write_$1(void);
    544 #endif
    545 int PEM_read_bio_$1(void);
    546 int PEM_write_bio_$1(void);
    547 EOF
    548       },
    549     },
    550     { regexp   => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)_ex<<<\((.*?),.*\)>>>/,
    551       massager => sub { return (<<"EOF");
    552 #ifndef OPENSSL_NO_STDIO
    553 int PEM_read_$1(void);
    554 int PEM_write_$1(void);
    555 int PEM_read_$1_ex(void);
    556 int PEM_write_$1_ex(void);
    557 #endif
    558 int PEM_read_bio_$1(void);
    559 int PEM_write_bio_$1(void);
    560 int PEM_read_bio_$1_ex(void);
    561 int PEM_write_bio_$1_ex(void);
    562 EOF
    563       },
    564     },
    565     { regexp   => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/,
    566       massager => sub { return (<<"EOF");
    567 #ifndef OPENSSL_NO_STDIO
    568 int PEM_write_$1(void);
    569 #endif
    570 int PEM_write_bio_$1(void);
    571 EOF
    572       },
    573     },
    574     { regexp   => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)_ex<<<\((.*?),.*\)>>>/,
    575       massager => sub { return (<<"EOF");
    576 #ifndef OPENSSL_NO_STDIO
    577 int PEM_write_$1(void);
    578 int PEM_write_$1_ex(void);
    579 #endif
    580 int PEM_write_bio_$1(void);
    581 int PEM_write_bio_$1_ex(void);
    582 EOF
    583       },
    584     },
    585     { regexp   => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/,
    586       massager => sub { return (<<"EOF");
    587 #ifndef OPENSSL_NO_STDIO
    588 int PEM_read_$1(void);
    589 #endif
    590 int PEM_read_bio_$1(void);
    591 EOF
    592       },
    593     },
    594     { regexp   => qr/DECLARE_PEM(?|_read|_read_cb)_ex<<<\((.*?),.*\)>>>/,
    595       massager => sub { return (<<"EOF");
    596 #ifndef OPENSSL_NO_STDIO
    597 int PEM_read_$1(void);
    598 int PEM_read_$1_ex(void);
    599 #endif
    600 int PEM_read_bio_$1(void);
    601 int PEM_read_bio_$1_ex(void);
    602 EOF
    603       },
    604     },
    605     # Universal translator of attributed PEM declarators
    606     { regexp   => qr/
    607           DECLARE_PEM
    608           ((?:_rw|_rw_cb|_rw_const|_write|_write_cb|_write_const|_read|_read_cb)
    609            (?:_ex)?)
    610           _attr
    611           <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>>
    612       /x,
    613       massager => sub { return (<<"EOF");
    614 DECLARE_PEM$1($3)
    615 EOF
    616       },
    617     },
    618 
    619     # OpenSSL's declaration of externs with possible export linkage
    620     # (really only relevant on Windows)
    621     { regexp   => qr/OPENSSL_(?:EXPORT|EXTERN)/,
    622       massager => sub { return ("extern"); }
    623     },
    624 
    625     # Spurious stuff found in the OpenSSL headers
    626     # Usually, these are just macros that expand to, well, something
    627     { regexp   => qr/__NDK_FPABI__/,
    628       massager => sub { return (); }
    629     },
    630     );
    631 
    632 my $anoncnt = 0;
    633 
    634 my @chandlers = (
    635     ##################################################################
    636     # C stuff
    637 
    638     # extern "C" of individual items
    639     # Note that the main parse function has a special hack for 'extern "C" {'
    640     # which can't be done in handlers
    641     # We simply ignore it.
    642     { regexp   => qr/^extern "C" (.*(?:;|>>>))/,
    643       massager => sub { return ($1); },
    644     },
    645     # any other extern is just ignored
    646     { regexp   => qr/^\s*                       # Any spaces before
    647                      extern                     # The keyword we look for
    648                      \b                         # word to non-word boundary
    649                      .*                         # Anything after
    650                      ;
    651                     /x,
    652       massager => sub { return (); },
    653     },
    654     # union, struct and enum definitions
    655     # Because this one might appear a little everywhere within type
    656     # definitions, we take it out and replace it with just
    657     # 'union|struct|enum name' while registering it.
    658     # This makes use of the parser trick to surround the outer braces
    659     # with <<< and >>>
    660     { regexp   => qr/(.*)                       # Anything before       ($1)
    661                      \b                         # word to non-word boundary
    662                      (union|struct|enum)        # The word used         ($2)
    663                      (?:\s([[:alpha:]_]\w*))?   # Struct or enum name   ($3)
    664                      <<<(\{.*?\})>>>            # Struct or enum definition ($4)
    665                      (.*)                       # Anything after        ($5)
    666                      ;
    667                     /x,
    668       massager => sub {
    669           my $before = $1;
    670           my $word = $2;
    671           my $name = $3
    672               || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct
    673           my $definition = $4;
    674           my $after = $5;
    675           my $type = $word eq "struct" ? 'S' : 'E';
    676           if ($before ne "" || $after ne ";") {
    677               if ($after =~ m|^\w|) { $after = " ".$after; }
    678               return ("$before$word $name$after;",
    679                       "$word $name", $type, "", "$word$definition", all_conds());
    680           }
    681           # If there was no before nor after, make the return much simple
    682           return ("", "$word $name", $type, "", "$word$definition", all_conds());
    683       }
    684     },
    685     # Named struct and enum forward declarations
    686     # We really just ignore them, but we need to parse them or the variable
    687     # declaration handler further down will think it's a variable declaration.
    688     { regexp   => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/,
    689       massager => sub { return (); }
    690     },
    691     # Function returning function pointer declaration
    692     # This sort of declaration may have a body (inline functions, for example)
    693     { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
    694                      ((?:\w|\*|\s)*?)           # Return type           ($2)
    695                      \s?                        # Possible space
    696                      <<<\(\*
    697                      ([[:alpha:]_]\w*)          # Function name         ($3)
    698                      (\(.*\))                   # Parameters            ($4)
    699                      \)>>>
    700                      <<<(\(.*\))>>>             # F.p. parameters       ($5)
    701                      (?:<<<\{.*\}>>>|;)         # Body or semicolon
    702                     /x,
    703       massager => sub {
    704           return ("", $3, 'T', "", "$2(*$4)$5", all_conds())
    705               if defined $1;
    706           return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); }
    707     },
    708     # Function pointer declaration, or typedef thereof
    709     # This sort of declaration never has a function body
    710     { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
    711                      ((?:\w|\*|\s)*?)           # Return type           ($2)
    712                      <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name   ($3)
    713                      <<<(\(.*\))>>>             # F.p. parameters       ($4)
    714                      ;
    715                     /x,
    716       massager => sub {
    717           return ("", $3, 'T', "", "$2(*)$4", all_conds())
    718               if defined $1;
    719           return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds());
    720       },
    721     },
    722     # Function declaration, or typedef thereof
    723     # This sort of declaration may have a body (inline functions, for example)
    724     { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
    725                      ((?:\w|\*|\s)*?)           # Return type           ($2)
    726                      \s?                        # Possible space
    727                      ([[:alpha:]_]\w*)          # Function name         ($3)
    728                      <<<(\(.*\))>>>             # Parameters            ($4)
    729                      (?:<<<\{.*\}>>>|;)         # Body or semicolon
    730                     /x,
    731       massager => sub {
    732           return ("", $3, 'T', "", "$2$4", all_conds())
    733               if defined $1;
    734           return ("", $3, 'F', $2, "$2$4", all_conds());
    735       },
    736     },
    737     # Variable declaration, including arrays, or typedef thereof
    738     { regexp   => qr/(?:(typedef)\s?)?          # Possible typedef      ($1)
    739                      ((?:\w|\*|\s)*?)           # Type                  ($2)
    740                      \s?                        # Possible space
    741                      ([[:alpha:]_]\w*)          # Variable name         ($3)
    742                      ((?:<<<\[[^\]]*\]>>>)*)    # Possible array declaration ($4)
    743                      ;
    744                     /x,
    745       massager => sub {
    746           return ("", $3, 'T', "", $2.($4||""), all_conds())
    747               if defined $1;
    748           return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds());
    749       },
    750     },
    751 );
    752 
    753 # End handlers are almost the same as handlers, except they are run through
    754 # ONCE when the input has been parsed through.  These are used to check for
    755 # remaining stuff, such as an unfinished #ifdef and stuff like that that the
    756 # main parser can't check on its own.
    757 my @endhandlers = (
    758     { massager => sub {
    759         my %opts = %{$_[0]};
    760 
    761         die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE}
    762             if @preprocessor_conds;
    763       }
    764     }
    765     );
    766 
    767 # takes a list of strings that can each contain one or several lines of code
    768 # also takes a hash of options as last argument.
    769 #
    770 # returns a list of hashes with information:
    771 #
    772 #       name            name of the thing
    773 #       type            type, see the massage handler function
    774 #       returntype      return type of functions and variables
    775 #       value           value for macros, signature for functions, variables
    776 #                       and structs
    777 #       conds           preprocessor conditions (array ref)
    778 
    779 sub parse {
    780     my %opts;
    781     if (ref($_[$#_]) eq "HASH") {
    782         %opts = %{$_[$#_]};
    783         pop @_;
    784     }
    785     my %state = (
    786         in_extern_C => 0,       # An exception to parenthesis processing.
    787         cpp_parens => [],       # A list of ending parens and braces found in
    788                                 # preprocessor directives
    789         c_parens => [],         # A list of ending parens and braces found in
    790                                 # C statements
    791         in_string => "",        # empty string when outside a string, otherwise
    792                                 # "'" or '"' depending on the starting quote.
    793         in_comment => "",       # empty string when outside a comment, otherwise
    794                                 # "/*" or "//" depending on the type of comment
    795                                 # found.  The latter will never be multiline
    796                                 # NOTE: in_string and in_comment will never be
    797                                 # true (in perl semantics) at the same time.
    798         current_line => 0,
    799         );
    800     my @result = ();
    801     my $normalized_line = "";   # $input_line, but normalized.  In essence, this
    802                                 # means that ALL whitespace is removed unless
    803                                 # it absolutely has to be present, and in that
    804                                 # case, there's only one space.
    805                                 # The cases where a space needs to stay present
    806                                 # are:
    807                                 # 1. between words
    808                                 # 2. between words and number
    809                                 # 3. after the first word of a preprocessor
    810                                 #    directive.
    811                                 # 4. for the #define directive, between the macro
    812                                 #    name/args and its value, so we end up with:
    813                                 #       #define FOO val
    814                                 #       #define BAR(x) something(x)
    815     my $collected_stmt = "";    # Where we're building up a C line until it's a
    816                                 # complete definition/declaration, as determined
    817                                 # by any handler being capable of matching it.
    818 
    819     # We use $_ shamelessly when looking through @lines.
    820     # In case we find a \ at the end, we keep filling it up with more lines.
    821     $_ = undef;
    822 
    823     foreach my $line (@_) {
    824         # split tries to be smart when a string ends with the thing we split on
    825         $line .= "\n" unless $line =~ m|\R$|;
    826         $line .= "#";
    827 
    828         # We use undef as a marker for a new line from the file.
    829         # Since we convert one line to several and unshift that into @lines,
    830         # that's the only safe way we have to track the original lines
    831         my @lines = map { ( undef, $_ ) } split m|\R|, $line;
    832 
    833         # Remember that extra # we added above?  Now we remove it
    834         pop @lines;
    835         pop @lines;             # Don't forget the undef
    836 
    837         while (@lines) {
    838             if (!defined($lines[0])) {
    839                 shift @lines;
    840                 $state{current_line}++;
    841                 if (!defined($_)) {
    842                     $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n";
    843                     $opts{PLACE2} = $opts{filename}.":".$state{current_line};
    844                 }
    845                 next;
    846             }
    847 
    848             $_ = "" unless defined $_;
    849             $_ .= shift @lines;
    850 
    851             if (m|\\$|) {
    852                 $_ = $`;
    853                 next;
    854             }
    855 
    856             if ($opts{debug}) {
    857                 print STDERR "DEBUG:----------------------------\n";
    858                 print STDERR "DEBUG: \$_      = '$_'\n";
    859             }
    860 
    861             ##########################################################
    862             # Now that we have a full line, let's process through it
    863             while(1) {
    864                 unless ($state{in_comment}) {
    865                     # Begin with checking if the current $normalized_line
    866                     # contains a preprocessor directive
    867                     # This is only done if we're not inside a comment and
    868                     # if it's a preprocessor directive and it's finished.
    869                     if ($normalized_line =~ m|^#| && $_ eq "") {
    870                         print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n"
    871                             if $opts{debug};
    872                         $opts{debug_type} = "OPENSSL CPP";
    873                         my @r = ( _run_handlers($normalized_line,
    874                                                 @opensslcpphandlers,
    875                                                 \%opts) );
    876                         if (shift @r) {
    877                             # Checking if there are lines to inject.
    878                             if (@r) {
    879                                 @r = split $/, (pop @r).$_;
    880                                 print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n"
    881                                     if $opts{debug} && @r;
    882                                 @lines = ( @r, @lines );
    883 
    884                                 $_ = "";
    885                             }
    886                         } else {
    887                             print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n"
    888                                 if $opts{debug};
    889                             $opts{debug_type} = "CPP";
    890                             my @r = ( _run_handlers($normalized_line,
    891                                                     @cpphandlers,
    892                                                     \%opts) );
    893                             if (shift @r) {
    894                                 if (ref($r[0]) eq "HASH") {
    895                                     push @result, shift @r;
    896                                 }
    897 
    898                                 # Now, check if there are lines to inject.
    899                                 # Really, this should never happen, it IS a
    900                                 # preprocessor directive after all...
    901                                 if (@r) {
    902                                     @r = split $/, pop @r;
    903                                     print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n"
    904                                     if $opts{debug} && @r;
    905                                     @lines = ( @r, @lines );
    906                                     $_ = "";
    907                                 }
    908                             }
    909                         }
    910 
    911                         # Note: we simply ignore all directives that no
    912                         # handler matches
    913                         $normalized_line = "";
    914                     }
    915 
    916                     # If the two strings end and start with a character that
    917                     # shouldn't get concatenated, add a space
    918                     my $space =
    919                         ($collected_stmt =~ m/(?:"|')$/
    920                          || ($collected_stmt =~ m/(?:\w|\d)$/
    921                              && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : "";
    922 
    923                     # Now, unless we're building up a preprocessor directive or
    924                     # are in the middle of a string, or the parens et al aren't
    925                     # balanced up yet, let's try and see if there's a OpenSSL
    926                     # or C handler that can make sense of what we have so far.
    927                     if ( $normalized_line !~ m|^#|
    928                          && ($collected_stmt ne "" || $normalized_line ne "")
    929                          && ! @{$state{c_parens}}
    930                          && ! $state{in_string} ) {
    931                         if ($opts{debug}) {
    932                             print STDERR "DEBUG[OPENSSL C]: \$collected_stmt  = '$collected_stmt'\n";
    933                             print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n";
    934                         }
    935                         $opts{debug_type} = "OPENSSL C";
    936                         my @r = ( _run_handlers($collected_stmt
    937                                                     .$space
    938                                                     .$normalized_line,
    939                                                 @opensslchandlers,
    940                                                 \%opts) );
    941                         if (shift @r) {
    942                             # Checking if there are lines to inject.
    943                             if (@r) {
    944                                 @r = split $/, (pop @r).$_;
    945                                 print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n"
    946                                     if $opts{debug} && @r;
    947                                 @lines = ( @r, @lines );
    948 
    949                                 $_ = "";
    950                             }
    951                             $normalized_line = "";
    952                             $collected_stmt = "";
    953                         } else {
    954                             if ($opts{debug}) {
    955                                 print STDERR "DEBUG[C]: \$collected_stmt  = '$collected_stmt'\n";
    956                                 print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n";
    957                             }
    958                             $opts{debug_type} = "C";
    959                             my @r = ( _run_handlers($collected_stmt
    960                                                         .$space
    961                                                         .$normalized_line,
    962                                                     @chandlers,
    963                                                     \%opts) );
    964                             if (shift @r) {
    965                                 if (ref($r[0]) eq "HASH") {
    966                                     push @result, shift @r;
    967                                 }
    968 
    969                                 # Checking if there are lines to inject.
    970                                 if (@r) {
    971                                     @r = split $/, (pop @r).$_;
    972                                     print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n"
    973                                         if $opts{debug} && @r;
    974                                     @lines = ( @r, @lines );
    975 
    976                                     $_ = "";
    977                                 }
    978                                 $normalized_line = "";
    979                                 $collected_stmt = "";
    980                             }
    981                         }
    982                     }
    983                     if ($_ eq "") {
    984                         $collected_stmt .= $space.$normalized_line;
    985                         $normalized_line = "";
    986                     }
    987                 }
    988 
    989                 if ($_ eq "") {
    990                     $_ = undef;
    991                     last;
    992                 }
    993 
    994                 # Take care of inside string first.
    995                 if ($state{in_string}) {
    996                     if (m/ (?:^|(?<!\\))        # Make sure it's not escaped
    997                            $state{in_string}    # Look for matching quote
    998                          /x) {
    999                         $normalized_line .= $`.$&;
   1000                         $state{in_string} = "";
   1001                         $_ = $';
   1002                         next;
   1003                     } else {
   1004                         die "Unfinished string without continuation found$opts{PLACE}\n";
   1005                     }
   1006                 }
   1007                 # ... or inside comments, whichever happens to apply
   1008                 elsif ($state{in_comment}) {
   1009 
   1010                     # This should never happen
   1011                     die "Something went seriously wrong, multiline //???$opts{PLACE}\n"
   1012                         if ($state{in_comment} eq "//");
   1013 
   1014                     # A note: comments are simply discarded.
   1015 
   1016                     if (m/ (?:^|(?<!\\))        # Make sure it's not escaped
   1017                            \*\/                 # Look for C comment end
   1018                          /x) {
   1019                         $state{in_comment} = "";
   1020                         $_ = $';
   1021                         print STDERR "DEBUG: Found end of comment, followed by '$_'\n"
   1022                             if $opts{debug};
   1023                         next;
   1024                     } else {
   1025                         $_ = "";
   1026                         next;
   1027                     }
   1028                 }
   1029 
   1030                 # At this point, it's safe to remove leading whites, but
   1031                 # we need to be careful with some preprocessor lines
   1032                 if (m|^\s+|) {
   1033                     my $rest = $';
   1034                     my $space = "";
   1035                     $space = " "
   1036                         if ($normalized_line =~ m/^
   1037                                                   \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)?
   1038                                                   | \#[a-z]+
   1039                                                   $/x);
   1040                     print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n"
   1041                         if $opts{debug};
   1042                     $_ = $space.$rest;
   1043                 }
   1044 
   1045                 my $parens =
   1046                     $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens';
   1047                 (my $paren_singular = $parens) =~ s|s$||;
   1048 
   1049                 # Now check for specific tokens, and if they are parens,
   1050                 # check them against $state{$parens}.  Note that we surround
   1051                 # the outermost parens with extra "<<<" and ">>>".  Those
   1052                 # are for the benefit of handlers who to need to detect
   1053                 # them, and they will be removed from the final output.
   1054                 if (m|^[\{\[\(]|) {
   1055                     my $body = $&;
   1056                     $_ = $';
   1057                     if (!@{$state{$parens}}) {
   1058                         if ("$normalized_line$body" =~ m|^extern "C"\{$|) {
   1059                             $state{in_extern_C} = 1;
   1060                             print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n"
   1061                                 if $opts{debug};
   1062                             $normalized_line = "";
   1063                         } else {
   1064                             $normalized_line .= "<<<".$body;
   1065                         }
   1066                     } else {
   1067                         $normalized_line .= $body;
   1068                     }
   1069 
   1070                     if ($normalized_line ne "") {
   1071                         print STDERR "DEBUG: found $paren_singular start '$body'\n"
   1072                             if $opts{debug};
   1073                         $body =~ tr|\{\[\(|\}\]\)|;
   1074                         print STDERR "DEBUG: pushing $paren_singular end '$body'\n"
   1075                             if $opts{debug};
   1076                         push @{$state{$parens}}, $body;
   1077                     }
   1078                 } elsif (m|^[\}\]\)]|) {
   1079                     $_ = $';
   1080 
   1081                     if (!@{$state{$parens}}
   1082                         && $& eq '}' && $state{in_extern_C}) {
   1083                         print STDERR "DEBUG: found end of 'extern \"C\"'\n"
   1084                             if $opts{debug};
   1085                         $state{in_extern_C} = 0;
   1086                     } else {
   1087                         print STDERR "DEBUG: Trying to match '$&' against '"
   1088                             ,join("', '", @{$state{$parens}})
   1089                             ,"'\n"
   1090                             if $opts{debug};
   1091                         die "Unmatched parentheses$opts{PLACE}\n"
   1092                             unless (@{$state{$parens}}
   1093                                     && pop @{$state{$parens}} eq $&);
   1094                         if (!@{$state{$parens}}) {
   1095                             $normalized_line .= $&.">>>";
   1096                         } else {
   1097                             $normalized_line .= $&;
   1098                         }
   1099                     }
   1100                 } elsif (m|^["']|) { # string start
   1101                     my $body = $&;
   1102                     $_ = $';
   1103 
   1104                     # We want to separate strings from \w and \d with one space.
   1105                     $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/;
   1106                     $normalized_line .= $body;
   1107                     $state{in_string} = $body;
   1108                 } elsif (m|^\/\*|) { # C style comment
   1109                     print STDERR "DEBUG: found start of C style comment\n"
   1110                         if $opts{debug};
   1111                     $state{in_comment} = $&;
   1112                     $_ = $';
   1113                 } elsif (m|^\/\/|) { # C++ style comment
   1114                     print STDERR "DEBUG: found C++ style comment\n"
   1115                         if $opts{debug};
   1116                     $_ = "";    # (just discard it entirely)
   1117                 } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ )
   1118                                  (?i: U | L | UL | LL | ULL )?
   1119                                | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)?
   1120                                ) /x) {
   1121                     print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n"
   1122                         if $opts{debug};
   1123                     $normalized_line .= $&;
   1124                     $_ = $';
   1125                 } elsif (m/^[[:alpha:]_]\w*/) {
   1126                     my $body = $&;
   1127                     my $rest = $';
   1128                     my $space = "";
   1129 
   1130                     # Now, only add a space if it's needed to separate
   1131                     # two \w characters, and we also surround strings with
   1132                     # a space.  In this case, that's if $normalized_line ends
   1133                     # with a \w, \d, " or '.
   1134                     $space = " "
   1135                         if ($normalized_line =~ m/("|')$/
   1136                             || ($normalized_line =~ m/(\w|\d)$/
   1137                                 && $body =~ m/^(\w|\d)/));
   1138 
   1139                     print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n"
   1140                         if $opts{debug};
   1141                     $normalized_line .= $space.$body;
   1142                     $_ = $rest;
   1143                 } elsif (m|^(?:\\)?.|) { # Catch-all
   1144                     $normalized_line .= $&;
   1145                     $_ = $';
   1146                 }
   1147             }
   1148         }
   1149     }
   1150     foreach my $handler (@endhandlers) {
   1151         if ($handler->{massager}) {
   1152             $handler->{massager}->(\%opts);
   1153         }
   1154     }
   1155     return @result;
   1156 }
   1157 
   1158 # arg1:    line to check
   1159 # arg2...: handlers to check
   1160 # return undef when no handler matched
   1161 sub _run_handlers {
   1162     my %opts;
   1163     if (ref($_[$#_]) eq "HASH") {
   1164         %opts = %{$_[$#_]};
   1165         pop @_;
   1166     }
   1167     my $line = shift;
   1168     my @handlers = @_;
   1169 
   1170     foreach my $handler (@handlers) {
   1171         if ($handler->{regexp}
   1172             && $line =~ m|^$handler->{regexp}$|) {
   1173             if ($handler->{massager}) {
   1174                 if ($opts{debug}) {
   1175                     print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n";
   1176                     print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n";
   1177                 }
   1178                 my $saved_line = $line;
   1179                 my @massaged =
   1180                     map { s/(<<<|>>>)//g; $_ }
   1181                     $handler->{massager}->($saved_line, \%opts);
   1182                 print STDERR "DEBUG[",$opts{debug_type},"]: Got back '"
   1183                     , join("', '", @massaged), "'\n"
   1184                     if $opts{debug};
   1185 
   1186                 # Because we may get back new lines to be
   1187                 # injected before whatever else that follows,
   1188                 # and the injected stuff might include
   1189                 # preprocessor lines, we need to inject them
   1190                 # in @lines and set $_ to the empty string to
   1191                 # break out from the inner loops
   1192                 my $injected_lines = shift @massaged || "";
   1193 
   1194                 if (@massaged) {
   1195                     return (1,
   1196                             {
   1197                                 name    => shift @massaged,
   1198                                 type    => shift @massaged,
   1199                                 returntype => shift @massaged,
   1200                                 value   => shift @massaged,
   1201                                 conds   => [ @massaged ]
   1202                             },
   1203                             $injected_lines
   1204                         );
   1205                 } else {
   1206                     print STDERR "DEBUG[",$opts{debug_type},"]:   (ignore, possible side effects)\n"
   1207                         if $opts{debug} && $injected_lines eq "";
   1208                     return (1, $injected_lines);
   1209                 }
   1210             }
   1211             return (1);
   1212         }
   1213     }
   1214     return (0);
   1215 }
   1216