Home | History | Annotate | Line # | Download | only in misc
cvs2cl.pl revision 1.1
      1 #!/bin/sh
      2 exec perl -w -x "$0" ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
      3 #!perl -w
      4 
      5 
      6 ##############################################################
      7 ###                                                        ###
      8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
      9 ###                                                        ###
     10 ##############################################################
     11 
     12 ## $Revision: 1.1 $
     13 ## $Date: 2009/01/20 14:36:08 $
     14 ## $Author: tteras $
     15 ##
     16 
     17 use strict;
     18 
     19 use File::Basename qw( fileparse );
     20 use Getopt::Long   qw( GetOptions );
     21 use Text::Wrap     qw( );
     22 use Time::Local    qw( timegm );
     23 use User::pwent    qw( getpwnam );
     24 
     25 # The Plan:
     26 #
     27 # Read in the logs for multiple files, spit out a nice ChangeLog that
     28 # mirrors the information entered during `cvs commit'.
     29 #
     30 # The problem presents some challenges. In an ideal world, we could
     31 # detect files with the same author, log message, and checkin time --
     32 # each <filelist, author, time, logmessage> would be a changelog entry.
     33 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
     34 # so checkins can span a range of times.  Also, the directory structure
     35 # could be hierarchical.
     36 #
     37 # Another question is whether we really want to have the ChangeLog
     38 # exactly reflect commits. An author could issue two related commits,
     39 # with different log entries, reflecting a single logical change to the
     40 # source. GNU style ChangeLogs group these under a single author/date.
     41 # We try to do the same.
     42 #
     43 # So, we parse the output of `cvs log', storing log messages in a
     44 # multilevel hash that stores the mapping:
     45 #   directory => author => time => message => filelist
     46 # As we go, we notice "nearby" commit times and store them together
     47 # (i.e., under the same timestamp), so they appear in the same log
     48 # entry.
     49 #
     50 # When we've read all the logs, we twist this mapping into
     51 # a time => author => message => filelist mapping for each directory.
     52 #
     53 # If we're not using the `--distributed' flag, the directory is always
     54 # considered to be `./', even as descend into subdirectories.
     55 
     56 # Call Tree
     57 
     58 # name                         number of lines (10.xii.03)
     59 # parse_options                         192
     60 # derive_changelog                       13
     61 # +-maybe_grab_accumulation_date         38
     62 # +-read_changelog                      277
     63 #   +-maybe_read_user_map_file           94
     64 #     +-run_ext                           9
     65 #   +-read_file_path                     29
     66 #   +-read_symbolic_name                 43
     67 #   +-read_revision                      49
     68 #   +-read_date_author_and_state         25
     69 #     +-parse_date_author_and_state      20
     70 #   +-read_branches                      36
     71 # +-output_changelog                    424
     72 #   +-pretty_file_list                  290
     73 #     +-common_path_prefix               35
     74 #   +-preprocess_msg_text                30
     75 #     +-min                               1
     76 #   +-mywrap                             16
     77 #   +-last_line_len                       5
     78 #   +-wrap_log_entry                    177
     79 #
     80 # Utilities
     81 #
     82 # xml_escape                              6
     83 # slurp_file                             11
     84 # debug                                   5
     85 # version                                 2
     86 # usage                                 142
     87 
     88 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
     89 #
     90 # Note about a bug-slash-opportunity:
     91 # -----------------------------------
     92 #
     93 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
     94 # reveals it:
     95 #
     96 #   #!/usr/bin/perl -w
     97 #
     98 #   use Text::Wrap;
     99 #
    100 #   my $test_text =
    101 #   "This script demonstrates a bug in Text::Wrap.  The very long line
    102 #   following this paragraph will be relocated relative to the surrounding
    103 #   text:
    104 #
    105 #   ====================================================================
    106 #
    107 #   See?  When the bug happens, we'll get the line of equal signs below
    108 #   this paragraph, even though it should be above.";
    109 #
    110 #
    111 #   # Print out the test text with no wrapping:
    112 #   print "$test_text";
    113 #   print "\n";
    114 #   print "\n";
    115 #
    116 #   # Now print it out wrapped, and see the bug:
    117 #   print wrap ("\t", "        ", "$test_text");
    118 #   print "\n";
    119 #   print "\n";
    120 #
    121 # If the line of equal signs were one shorter, then the bug doesn't
    122 # happen.  Interesting.
    123 #
    124 # Anyway, rather than fix this in Text::Wrap, we might as well write a
    125 # new wrap() which has the following much-needed features:
    126 #
    127 # * initial indentation, like current Text::Wrap()
    128 # * subsequent line indentation, like current Text::Wrap()
    129 # * user chooses among: force-break long words, leave them alone, or die()?
    130 # * preserve existing indentation: chopped chunks from an indented line
    131 #   are indented by same (like this line, not counting the asterisk!)
    132 # * optional list of things to preserve on line starts, default ">"
    133 #
    134 # Note that the last two are essentially the same concept, so unify in
    135 # implementation and give a good interface to controlling them.
    136 #
    137 # And how about:
    138 #
    139 # Optionally, when encounter a line pre-indented by same as previous
    140 # line, then strip the newline and refill, but indent by the same.
    141 # Yeah...
    142 
    143 # Globals --------------------------------------------------------------------
    144 
    145 # In case we have to print it out:
    146 my $VERSION = '$Revision: 1.1 $';
    147 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
    148 
    149 ## Vars set by options:
    150 
    151 # Print debugging messages?
    152 my $Debug = 0;
    153 
    154 # Just show version and exit?
    155 my $Print_Version = 0;
    156 
    157 # Just print usage message and exit?
    158 my $Print_Usage = 0;
    159 
    160 # What file should we generate (defaults to "ChangeLog")?
    161 my $Log_File_Name = "ChangeLog";
    162 
    163 # Grab most recent entry date from existing ChangeLog file, just add
    164 # to that ChangeLog.
    165 my $Cumulative = 0;
    166 
    167 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
    168 # as it guarantees at least one entry in the update changelog, which means
    169 # that there will always be a date to extract for the next update.  The repeat
    170 # entry can be removed in postprocessing, if necessary.
    171 
    172 # MJP 2003-08-02
    173 # I don't think this actually does anything useful
    174 my $Update = 0;
    175 
    176 # Expand usernames to email addresses based on a map file?
    177 my $User_Map_File = '';
    178 my $User_Passwd_File;
    179 my $Mail_Domain;
    180 
    181 # Output log in chronological order? [default is reverse chronological order]
    182 my $Chronological_Order = 0;
    183 
    184 # Grab user details via gecos
    185 my $Gecos = 0;
    186 
    187 # User domain for gecos email addresses
    188 my $Domain;
    189 
    190 # Output to a file or to stdout?
    191 my $Output_To_Stdout = 0;
    192 
    193 # Eliminate empty log messages?
    194 my $Prune_Empty_Msgs = 0;
    195 
    196 # Tags of which not to output
    197 my %ignore_tags;
    198 
    199 # Show only revisions with Tags
    200 my %show_tags;
    201 
    202 # Don't call Text::Wrap on the body of the message
    203 my $No_Wrap = 0;
    204 
    205 # Indentation of log messages
    206 my $Indent = "\t";
    207 
    208 # Don't do any pretty print processing
    209 my $Summary = 0;
    210 
    211 # Separates header from log message.  Code assumes it is either " " or
    212 # "\n\n", so if there's ever an option to set it to something else,
    213 # make sure to go through all conditionals that use this var.
    214 my $After_Header = " ";
    215 
    216 # XML Encoding
    217 my $XML_Encoding = '';
    218 
    219 # Format more for programs than for humans.
    220 my $XML_Output = 0;
    221 my $No_XML_Namespace = 0;
    222 my $No_XML_ISO_Date = 0;
    223 
    224 # Do some special tweaks for log data that was written in FSF
    225 # ChangeLog style.
    226 my $FSF_Style = 0;
    227 
    228 # Set iff output should be like an FSF-style ChangeLog.
    229 my $FSF_Output = 0;
    230 
    231 # Show times in UTC instead of local time
    232 my $UTC_Times = 0;
    233 
    234 # Show times in output?
    235 my $Show_Times = 1;
    236 
    237 # Show day of week in output?
    238 my $Show_Day_Of_Week = 0;
    239 
    240 # Show revision numbers in output?
    241 my $Show_Revisions = 0;
    242 
    243 # Show dead files in output?
    244 my $Show_Dead = 0;
    245 
    246 # Hide dead trunk files which were created as a result of additions on a
    247 # branch?
    248 my $Hide_Branch_Additions = 1;
    249 
    250 # Show tags (symbolic names) in output?
    251 my $Show_Tags = 0;
    252 
    253 # Show tags separately in output?
    254 my $Show_Tag_Dates = 0;
    255 
    256 # Show branches by symbolic name in output?
    257 my $Show_Branches = 0;
    258 
    259 # Show only revisions on these branches or their ancestors.
    260 my @Follow_Branches;
    261 # Show only revisions on these branches or their ancestors; ignore descendent
    262 # branches.
    263 my @Follow_Only;
    264 
    265 # Don't bother with files matching this regexp.
    266 my @Ignore_Files;
    267 
    268 # How exactly we match entries.  We definitely want "o",
    269 # and user might add "i" by using --case-insensitive option.
    270 my $Case_Insensitive = 0;
    271 
    272 # Maybe only show log messages matching a certain regular expression.
    273 my $Regexp_Gate = '';
    274 
    275 # Show tags only matching certain regular expression.
    276 my $Regexp_Tag = '';
    277 
    278 # Pass this global option string along to cvs, to the left of `log':
    279 my $Global_Opts = '';
    280 
    281 # Pass this option string along to the cvs log subcommand:
    282 my $Command_Opts = '';
    283 
    284 # Read log output from stdin instead of invoking cvs log?
    285 my $Input_From_Stdin = 0;
    286 
    287 # Don't show filenames in output.
    288 my $Hide_Filenames = 0;
    289 
    290 # Don't shorten directory names from filenames.
    291 my $Common_Dir = 1;
    292 
    293 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
    294 # times that span a range of time. We assume that checkins will last no
    295 # longer than $Max_Checkin_Duration seconds, and that similarly, no
    296 # checkins will happen from the same users with the same message less
    297 # than $Max_Checkin_Duration seconds apart.
    298 my $Max_Checkin_Duration = 180;
    299 
    300 # What to put at the front of [each] ChangeLog.
    301 my $ChangeLog_Header = '';
    302 
    303 # Whether to enable 'delta' mode, and for what start/end tags.
    304 my $Delta_Mode = 0;
    305 my $Delta_From = '';
    306 my $Delta_To = '';
    307 
    308 my $TestCode;
    309 
    310 # Whether to parse filenames from the RCS filename, and if so what
    311 # prefix to strip.
    312 my $RCS_Root;
    313 
    314 # Whether to output information on the # of lines added and removed
    315 # by each file modification.
    316 my $Show_Lines_Modified = 0;
    317 
    318 ## end vars set by options.
    319 
    320 # latest observed times for the start/end tags in delta mode
    321 my $Delta_StartTime = 0;
    322 my $Delta_EndTime = 0;
    323 
    324 my $No_Ancestors = 0;
    325 
    326 my $No_Extra_Indent = 0;
    327 
    328 my $GroupByDate = 0;
    329 my $GroupByAuthor = 0;
    330 
    331 # ----------------------------------------------------------------------------
    332 
    333 package CVS::Utils::ChangeLog::EntrySet;
    334 
    335 sub new {
    336   my $class = shift;
    337   my %self;
    338   bless \%self, $class;
    339 }
    340 
    341 # -------------------------------------
    342 
    343 sub output_changelog {
    344   my $output_type = $XML_Output ? 'XML' : 'Text';
    345   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
    346   my $output = $output_class->new(follow_branches => \@Follow_Branches,
    347                                   follow_only     => \@Follow_Only,
    348                                   ignore_tags     => \%ignore_tags,
    349                                   show_tags       => \%show_tags,
    350                                  );
    351   $output->output_changelog(@_);
    352 }
    353 
    354 # -------------------------------------
    355 
    356 sub add_fileentry {
    357   my ($self, $file_full_path, $time, $revision, $state, $lines,
    358       $branch_names, $branch_roots, $branch_numbers,
    359       $symbolic_names, $author, $msg_txt) = @_;
    360 
    361       my $qunk =
    362         CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
    363                                               $state, $lines,
    364                                               $branch_names, $branch_roots,
    365                                               $branch_numbers,
    366                                               $symbolic_names);
    367 
    368       # We might be including revision numbers and/or tags and/or
    369       # branch names in the output.  Most of the code from here to
    370       # loop-end deals with organizing these in qunk.
    371 
    372       unless ( $Hide_Branch_Additions
    373                and
    374                $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
    375         # Add this file to the list
    376         # (We use many spoonfuls of autovivication magic. Hashes and arrays
    377         # will spring into existence if they aren't there already.)
    378 
    379         &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
    380 
    381         # Store with the files in this commit.  Later we'll loop through
    382         # again, making sure that revisions with the same log message
    383         # and nearby commit times are grouped together as one commit.
    384         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
    385           CVS::Utils::ChangeLog::Message->new($msg_txt)
    386               unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
    387         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
    388       }
    389 
    390 }
    391 
    392 # ----------------------------------------------------------------------------
    393 
    394 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
    395 
    396 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
    397 
    398 use File::Basename qw( fileparse );
    399 
    400 sub new {
    401   my $class = shift;
    402   my $self = $class->SUPER::new(@_);
    403 }
    404 
    405 # -------------------------------------
    406 
    407 sub wday {
    408   my $self = shift; my $class = ref $self;
    409   my ($wday) = @_;
    410 
    411   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
    412 }
    413 
    414 # -------------------------------------
    415 
    416 sub header_line {
    417   my $self = shift;
    418   my ($time, $author, $lastdate, $lastauthor) = @_;
    419 
    420   my $header_line = '';
    421 
    422   my (undef,$min,$hour,$mday,$mon,$year,$wday)
    423     = $UTC_Times ? gmtime($time) : localtime($time);
    424 
    425   my $date = $self->fdatetime($time);
    426 
    427   if ($Show_Times) {
    428     $header_line = sprintf "%s  %s\n\n", $date, $author;
    429   } else {
    430     if ( $GroupByDate and ($date eq $lastdate) and
    431          ((!$GroupByAuthor) or ($author eq $lastauthor)) ) {
    432       $header_line = '';
    433     } else {
    434       if ( $GroupByDate and ! $GroupByAuthor ) {
    435         $header_line = "$date\n\n";
    436       } else {
    437         $header_line = "$date  $author\n\n";
    438       }
    439     }
    440   }
    441 }
    442 
    443 # -------------------------------------
    444 
    445 sub preprocess_msg_text {
    446   my $self = shift;
    447   my ($text) = @_;
    448 
    449   $text = $self->SUPER::preprocess_msg_text($text);
    450 
    451   unless ( $No_Wrap ) {
    452     # Strip off lone newlines, but only for lines that don't begin with
    453     # whitespace or a mail-quoting character, since we want to preserve
    454     # that kind of formatting.  Also don't strip newlines that follow a
    455     # period; we handle those specially next.  And don't strip
    456     # newlines that precede an open paren.
    457     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
    458 
    459     # If a newline follows a period, make sure that when we bring up the
    460     # bottom sentence, it begins with two spaces.
    461     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
    462   }
    463 
    464   return $text;
    465 }
    466 
    467 # -------------------------------------
    468 
    469 # Here we take a bunch of qunks and convert them into printed
    470 # summary that will include all the information the user asked for.
    471 sub pretty_file_list {
    472   my $self = shift;
    473 
    474   return ''
    475     if $Hide_Filenames;
    476 
    477   my $qunksref = shift;
    478 
    479   my @filenames;
    480   my $beauty = '';          # The accumulating header string for this entry.
    481   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
    482   my %unanimous_tags;       # Tags found in all qunks
    483   my %all_branches;         # Branches found in any qunk
    484   my $fbegun = 0;           # Did we begin printing filenames yet?
    485 
    486   my ($common_dir, $qunkrefs) =
    487     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
    488 
    489   my @qunkrefs = @$qunkrefs;
    490 
    491   # Not XML output, so complexly compactify for chordate consumption.  At this
    492   # point we have enough global information about all the qunks to organize
    493   # them non-redundantly for output.
    494 
    495   if ($common_dir) {
    496     # Note that $common_dir still has its trailing slash
    497     $beauty .= "$common_dir: ";
    498   }
    499 
    500   if ($Show_Branches)
    501   {
    502     # For trailing revision numbers.
    503     my @brevisions;
    504 
    505     foreach my $branch (keys (%all_branches))
    506     {
    507       foreach my $qunkref (@qunkrefs)
    508       {
    509         if ((defined ($qunkref->branch))
    510             and ($qunkref->branch eq $branch))
    511         {
    512           if ($fbegun) {
    513             # kff todo: comma-delimited in XML too?  Sure.
    514             $beauty .= ", ";
    515           }
    516           else {
    517             $fbegun = 1;
    518           }
    519           my $fname = substr ($qunkref->filename, length ($common_dir));
    520           $beauty .= $fname;
    521           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
    522 
    523           if ( $Show_Tags and defined $qunkref->tags ) {
    524             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
    525 
    526             if (@tags) {
    527               $beauty .= " (tags: ";
    528               $beauty .= join (', ', @tags);
    529               $beauty .= ")";
    530             }
    531           }
    532 
    533           if ($Show_Revisions) {
    534             # Collect the revision numbers' last components, but don't
    535             # print them -- they'll get printed with the branch name
    536             # later.
    537             $qunkref->revision =~ /.+\.([\d]+)$/;
    538             push (@brevisions, $1);
    539 
    540             # todo: we're still collecting branch roots, but we're not
    541             # showing them anywhere.  If we do show them, it would be
    542             # nifty to just call them revision "0" on a the branch.
    543             # Yeah, that's the ticket.
    544           }
    545         }
    546       }
    547       $beauty .= " ($branch";
    548       if (@brevisions) {
    549         if ((scalar (@brevisions)) > 1) {
    550           $beauty .= ".[";
    551           $beauty .= (join (',', @brevisions));
    552           $beauty .= "]";
    553         }
    554         else {
    555           # Square brackets are spurious here, since there's no range to
    556           # encapsulate
    557           $beauty .= ".$brevisions[0]";
    558         }
    559       }
    560       $beauty .= ")";
    561     }
    562   }
    563 
    564   # Okay; any qunks that were done according to branch are taken care
    565   # of, and marked as printed.  Now print everyone else.
    566 
    567   my %fileinfo_printed;
    568   foreach my $qunkref (@qunkrefs)
    569   {
    570     next if (defined ($qunkref->{'printed'}));   # skip if already printed
    571 
    572     my $b = substr ($qunkref->filename, length ($common_dir));
    573     # todo: Shlomo's change was this:
    574     # $beauty .= substr ($qunkref->filename,
    575     #              (($common_dir eq "./") ? '' : length ($common_dir)));
    576     $qunkref->{'printed'} = 1;  # Set a mark bit.
    577 
    578     if ($Show_Revisions || $Show_Tags || $Show_Dead)
    579     {
    580       my $started_addendum = 0;
    581 
    582       if ($Show_Revisions) {
    583         $started_addendum = 1;
    584         $b .= " (";
    585         $b .= $qunkref->revision;
    586       }
    587       if ($Show_Dead && $qunkref->state =~ /dead/)
    588       {
    589         # Deliberately not using $started_addendum. Keeping it simple.
    590         $b .= "[DEAD]";
    591       }
    592       if ($Show_Tags && (defined $qunkref->tags)) {
    593         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
    594         if ((scalar (@tags)) > 0) {
    595           if ($started_addendum) {
    596             $b .= ", ";
    597           }
    598           else {
    599             $b .= " (tags: ";
    600           }
    601           $b .= join (', ', @tags);
    602           $started_addendum = 1;
    603         }
    604       }
    605       if ($started_addendum) {
    606         $b .= ")";
    607       }
    608     }
    609 
    610     unless ( exists $fileinfo_printed{$b} ) {
    611       if ($fbegun) {
    612         $beauty .= ", ";
    613       } else {
    614         $fbegun = 1;
    615       }
    616       $beauty .= $b, $fileinfo_printed{$b} = 1;
    617     }
    618   }
    619 
    620   # Unanimous tags always come last.
    621   if ($Show_Tags && %unanimous_tags)
    622   {
    623     $beauty .= " (utags: ";
    624     $beauty .= join (', ', sort keys (%unanimous_tags));
    625     $beauty .= ")";
    626   }
    627 
    628   # todo: still have to take care of branch_roots?
    629 
    630   $beauty = "$beauty:";
    631 
    632   return $beauty;
    633 }
    634 
    635 # -------------------------------------
    636 
    637 sub output_tagdate {
    638   my $self = shift;
    639   my ($fh, $time, $tag) = @_;
    640 
    641   my $fdatetime = $self->fdatetime($time);
    642   print $fh "$fdatetime  tag $tag\n\n";
    643   return;
    644 }
    645 
    646 # -------------------------------------
    647 
    648 sub format_body {
    649   my $self = shift;
    650   my ($msg, $files, $qunklist) = @_;
    651 
    652   my $body;
    653 
    654   if ( $No_Wrap and ! $Summary ) {
    655     $msg = $self->preprocess_msg_text($msg);
    656     $files = $self->mywrap("\t", "\t  ", "* $files");
    657     $msg =~ s/\n(.+)/\n$Indent$1/g;
    658     unless ($After_Header eq " ") {
    659       $msg =~ s/^(.+)/$Indent$1/g;
    660     }
    661     if ( $Hide_Filenames ) {
    662       $body = $After_Header . $msg;
    663     } else {
    664       $body = $files . $After_Header . $msg;
    665     }
    666   } elsif ( $Summary ) {
    667     my ($filelist, $qunk);
    668     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
    669 
    670     $msg = $self->preprocess_msg_text($msg);
    671     #
    672     #     Sort the files (qunks) according to the operation that was
    673     # performed.  Files which were added have no line change
    674     # indicator, whereas deleted files have state dead.
    675     #
    676     foreach $qunk ( @$qunklist ) {
    677       if ( "dead" eq $qunk->state) {
    678         push @DeletedQunks, $qunk;
    679       } elsif ( ! defined $qunk->lines ) {
    680         push @AddedQunks, $qunk;
    681       } else {
    682         push @ChangedQunks, $qunk;
    683       }
    684     }
    685     #
    686     #     The qunks list was  originally in tree search order.  Let's
    687     # get that back.  The lists, if they exist, will be reversed upon
    688     # processing.
    689     #
    690 
    691     #
    692     #     Now write the three sections onto $filelist
    693     #
    694     if ( @DeletedQunks ) {
    695       $filelist .= "\tDeleted:\n";
    696       foreach $qunk ( @DeletedQunks ) {
    697         $filelist .= "\t\t" . $qunk->filename;
    698         $filelist .= " (" . $qunk->revision . ")";
    699         $filelist .= "\n";
    700       }
    701       undef @DeletedQunks;
    702     }
    703 
    704     if ( @AddedQunks ) {
    705       $filelist .= "\tAdded:\n";
    706       foreach $qunk (@AddedQunks) {
    707         $filelist .= "\t\t" . $qunk->filename;
    708         $filelist .= " (" . $qunk->revision . ")";
    709         $filelist .= "\n";
    710       }
    711       undef @AddedQunks ;
    712     }
    713 
    714     if ( @ChangedQunks ) {
    715       $filelist .= "\tChanged:\n";
    716       foreach $qunk (@ChangedQunks) {
    717         $filelist .= "\t\t" . $qunk->filename;
    718         $filelist .= " (" . $qunk->revision . ")";
    719         $filelist .= ", \"" . $qunk->state . "\"";
    720         $filelist .= ", lines: " . $qunk->lines;
    721         $filelist .= "\n";
    722       }
    723       undef @ChangedQunks;
    724     }
    725 
    726     chomp $filelist;
    727 
    728     if ( $Hide_Filenames ) {
    729       $filelist = '';
    730     }
    731 
    732     $msg =~ s/\n(.*)/\n$Indent$1/g;
    733     unless ( $After_Header eq " " or $FSF_Style ) {
    734       $msg =~ s/^(.*)/$Indent$1/g;
    735     }
    736 
    737     unless ( $No_Wrap ) {
    738       if ( $FSF_Style ) {
    739         $msg = $self->wrap_log_entry($msg, '', 69, 69);
    740         chomp($msg);
    741         chomp($msg);
    742       } else {
    743         $msg = $self->mywrap('', $Indent, "$msg");
    744         $msg =~ s/[ \t]+\n/\n/g;
    745       }
    746     }
    747 
    748     $body = $filelist . $After_Header . $msg;
    749   } else {  # do wrapping, either FSF-style or regular
    750     my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
    751 
    752     if ( $FSF_Style ) {
    753       $files = $self->mywrap($Indent, $latter_wrap, "* $files");
    754 
    755       my $files_last_line_len = 0;
    756       if ( $After_Header eq " " ) {
    757         $files_last_line_len = $self->last_line_len($files);
    758         $files_last_line_len += 1;  # for $After_Header
    759       }
    760 
    761       $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
    762       $body = $files . $After_Header . $msg;
    763     } else {  # not FSF-style
    764       $msg = $self->preprocess_msg_text($msg);
    765       $body = $files . $After_Header . $msg;
    766       $body = $self->mywrap($Indent, $latter_wrap, "* $body");
    767       $body =~ s/[ \t]+\n/\n/g;
    768     }
    769   }
    770 
    771   return $body;
    772 }
    773 
    774 # ----------------------------------------------------------------------------
    775 
    776 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
    777 
    778 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
    779 
    780 use File::Basename qw( fileparse );
    781 
    782 sub new {
    783   my $class = shift;
    784   my $self = $class->SUPER::new(@_);
    785 }
    786 
    787 # -------------------------------------
    788 
    789 sub header_line {
    790   my $self = shift;
    791   my ($time, $author, $lastdate) = @_;
    792 
    793   my $header_line = '';
    794 
    795   my $isoDate;
    796 
    797   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
    798 
    799   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
    800   $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
    801                      $y + 1900, $m + 1, $d, $H, $M, $S);
    802 
    803   my (undef,$min,$hour,$mday,$mon,$year,$wday)
    804     = $UTC_Times ? gmtime($time) : localtime($time);
    805 
    806   my $date = $self->fdatetime($time);
    807   $wday = $self->wday($wday);
    808 
    809   $header_line =
    810     sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
    811              $year+1900, $mon+1, $mday, $hour, $min);
    812   $header_line .= "<isoDate>$isoDate</isoDate>\n"
    813     unless $No_XML_ISO_Date;
    814   $header_line .= sprintf("<author>%s</author>\n" , $author);
    815 }
    816 
    817 # -------------------------------------
    818 
    819 sub wday {
    820   my $self = shift; my $class = ref $self;
    821   my ($wday) = @_;
    822 
    823   return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
    824 }
    825 
    826 # -------------------------------------
    827 
    828 sub escape {
    829   my $self = shift;
    830 
    831   my $txt = shift;
    832   $txt =~ s/&/&amp;/g;
    833   $txt =~ s/</&lt;/g;
    834   $txt =~ s/>/&gt;/g;
    835   return $txt;
    836 }
    837 
    838 # -------------------------------------
    839 
    840 sub output_header {
    841   my $self = shift;
    842   my ($fh) = @_;
    843 
    844   my $encoding    =
    845     length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
    846   my $version     = 'version="1.0"';
    847   my $declaration =
    848     sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
    849   my $root        =
    850     $No_XML_Namespace ?
    851       '<changelog>'     :
    852         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
    853   print $fh "$declaration\n\n$root\n\n";
    854 }
    855 
    856 # -------------------------------------
    857 
    858 sub output_footer {
    859   my $self = shift;
    860   my ($fh) = @_;
    861 
    862   print $fh "</changelog>\n";
    863 }
    864 
    865 # -------------------------------------
    866 
    867 sub preprocess_msg_text {
    868   my $self = shift;
    869   my ($text) = @_;
    870 
    871   $text = $self->SUPER::preprocess_msg_text($text);
    872 
    873   $text = $self->escape($text);
    874   chomp $text;
    875   $text = "<msg>${text}</msg>\n";
    876 
    877   return $text;
    878 }
    879 
    880 # -------------------------------------
    881 
    882 # Here we take a bunch of qunks and convert them into a printed
    883 # summary that will include all the information the user asked for.
    884 sub pretty_file_list {
    885   my $self = shift;
    886   my ($qunksref) = @_;
    887 
    888   my $beauty = '';          # The accumulating header string for this entry.
    889   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
    890   my %unanimous_tags;       # Tags found in all qunks
    891   my %all_branches;         # Branches found in any qunk
    892   my $fbegun = 0;           # Did we begin printing filenames yet?
    893 
    894   my ($common_dir, $qunkrefs) =
    895     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
    896       $qunksref);
    897 
    898   my @qunkrefs = @$qunkrefs;
    899 
    900   # If outputting XML, then our task is pretty simple, because we
    901   # don't have to detect common dir, common tags, branch prefixing,
    902   # etc.  We just output exactly what we have, and don't worry about
    903   # redundancy or readability.
    904 
    905   foreach my $qunkref (@qunkrefs)
    906   {
    907     my $filename    = $qunkref->filename;
    908     my $state       = $qunkref->state;
    909     my $revision    = $qunkref->revision;
    910     my $tags        = $qunkref->tags;
    911     my $branch      = $qunkref->branch;
    912     my $branchroots = $qunkref->roots;
    913     my $lines       = $qunkref->lines;
    914 
    915     $filename = $self->escape($filename);   # probably paranoia
    916     $revision = $self->escape($revision);   # definitely paranoia
    917 
    918     $beauty .= "<file>\n";
    919     $beauty .= "<name>${filename}</name>\n";
    920     $beauty .= "<cvsstate>${state}</cvsstate>\n";
    921     $beauty .= "<revision>${revision}</revision>\n";
    922 
    923     if ($Show_Lines_Modified
    924         && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
    925         $beauty .= "<linesadded>$1</linesadded>\n";
    926         $beauty .= "<linesremoved>$2</linesremoved>\n";
    927     }
    928 
    929     if ($branch) {
    930       $branch   = $self->escape($branch);     # more paranoia
    931       $beauty .= "<branch>${branch}</branch>\n";
    932     }
    933     foreach my $tag (@$tags) {
    934       $tag = $self->escape($tag);  # by now you're used to the paranoia
    935       $beauty .= "<tag>${tag}</tag>\n";
    936     }
    937     foreach my $root (@$branchroots) {
    938       $root = $self->escape($root);  # which is good, because it will continue
    939       $beauty .= "<branchroot>${root}</branchroot>\n";
    940     }
    941     $beauty .= "</file>\n";
    942   }
    943 
    944   # Theoretically, we could go home now.  But as long as we're here,
    945   # let's print out the common_dir and utags, as a convenience to
    946   # the receiver (after all, earlier code calculated that stuff
    947   # anyway, so we might as well take advantage of it).
    948 
    949   if ((scalar (keys (%unanimous_tags))) > 1) {
    950     foreach my $utag ((keys (%unanimous_tags))) {
    951       $utag = $self->escape($utag);   # the usual paranoia
    952       $beauty .= "<utag>${utag}</utag>\n";
    953     }
    954   }
    955   if ($common_dir) {
    956     $common_dir = $self->escape($common_dir);
    957     $beauty .= "<commondir>${common_dir}</commondir>\n";
    958   }
    959 
    960   # That's enough for XML, time to go home:
    961   return $beauty;
    962 }
    963 
    964 # -------------------------------------
    965 
    966 sub output_tagdate {
    967   my $self = shift;
    968   my ($fh, $time, $tag) = @_;
    969 
    970   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
    971 
    972   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
    973   my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
    974                        $y + 1900, $m + 1, $d, $H, $M, $S);
    975 
    976   print $fh "<tagdate>\n";
    977   print $fh "<tagisodate>$isoDate</tagisodate>\n";
    978   print $fh "<tagdatetag>$tag</tagdatetag>\n";
    979   print $fh "</tagdate>\n\n";
    980   return;
    981 }
    982 
    983 # -------------------------------------
    984 
    985 sub output_entry {
    986   my $self = shift;
    987   my ($fh, $entry) = @_;
    988   print $fh "<entry>\n$entry</entry>\n\n";
    989 }
    990 
    991 # -------------------------------------
    992 
    993 sub format_body {
    994   my $self = shift;
    995   my ($msg, $files, $qunklist) = @_;
    996 
    997   $msg = $self->preprocess_msg_text($msg);
    998   return $files . $msg;
    999 }
   1000 
   1001 # ----------------------------------------------------------------------------
   1002 
   1003 package CVS::Utils::ChangeLog::EntrySet::Output;
   1004 
   1005 use Carp           qw( croak );
   1006 use File::Basename qw( fileparse );
   1007 
   1008 # Class Utility Functions -------------
   1009 
   1010 { # form closure
   1011 
   1012 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
   1013 sub weekday_en {
   1014   my $class = shift;
   1015   return $weekdays[$_[0]];
   1016 }
   1017 
   1018 }
   1019 
   1020 # -------------------------------------
   1021 
   1022 sub new {
   1023   my ($proto, %args) = @_;
   1024   my $class = ref $proto || $proto;
   1025 
   1026   my $follow_branches = delete $args{follow_branches};
   1027   my $follow_only     = delete $args{follow_only};
   1028   my $ignore_tags     = delete $args{ignore_tags};
   1029   my $show_tags       = delete $args{show_tags};
   1030   die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
   1031     for keys %args;
   1032 
   1033   bless +{follow_branches => $follow_branches,
   1034           follow_only     => $follow_only,
   1035           show_tags       => $show_tags,
   1036           ignore_tags     => $ignore_tags,
   1037          }, $class;
   1038 }
   1039 
   1040 # Abstract Subrs ----------------------
   1041 
   1042 sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
   1043 sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
   1044 sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
   1045 sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
   1046 
   1047 # Instance Subrs ----------------------
   1048 
   1049 sub output_header { }
   1050 
   1051 # -------------------------------------
   1052 
   1053 sub output_entry {
   1054   my $self = shift;
   1055   my ($fh, $entry) = @_;
   1056   print $fh "$entry\n";
   1057 }
   1058 
   1059 # -------------------------------------
   1060 
   1061 sub output_footer { }
   1062 
   1063 # -------------------------------------
   1064 
   1065 sub escape { return $_[1] }
   1066 
   1067 # -------------------------------------
   1068 
   1069 sub _revision_is_wanted {
   1070   my ($self, $qunk) = @_;
   1071 
   1072   my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
   1073   my $follow_branches = $self->{follow_branches};
   1074   my $follow_only     = $self->{follow_only};
   1075 
   1076   for my $ignore_tag (keys %{$self->{ignore_tags}}) {
   1077     return
   1078       if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
   1079   }
   1080 
   1081   if ( keys %{$self->{show_tags}} ) {
   1082     for my $show_tag (keys %{$self->{show_tags}}) {
   1083       return
   1084         if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
   1085     }
   1086   }
   1087 
   1088   return 1
   1089     unless @$follow_branches + @$follow_only; # no follow is follow all
   1090 
   1091   for my $x (map([$_, 1], @$follow_branches),
   1092              map([$_, 0], @$follow_only    )) {
   1093     my ($branch, $followsub) = @$x;
   1094 
   1095     # Special case for following trunk revisions
   1096     return 1
   1097       if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
   1098 
   1099     if ( my $branch_number = $branch_numbers->{$branch} ) {
   1100       # Are we on one of the follow branches or an ancestor of same?
   1101 
   1102       # If this revision is a prefix of the branch number, or possibly is less
   1103       # in the minormost number, OR if this branch number is a prefix of the
   1104       # revision, then yes.  Otherwise, no.
   1105 
   1106       # So below, we determine if any of those conditions are met.
   1107 
   1108       # Trivial case: is this revision on the branch?  (Compare this way to
   1109       # avoid regexps that screw up Emacs indentation, argh.)
   1110       if ( substr($revision, 0, (length($branch_number) + 1))
   1111            eq
   1112            ($branch_number . ".") ) {
   1113         if ( $followsub ) {
   1114           return 1;
   1115 #        } elsif ( length($revision) == length($branch_number)+2 ) {
   1116         } elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
   1117           return 1;
   1118         }
   1119       } elsif ( length($branch_number) > length($revision)
   1120                 and
   1121                 ! $No_Ancestors ) {
   1122         # Non-trivial case: check if rev is ancestral to branch
   1123 
   1124         # r_left still has the trailing "."
   1125         my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
   1126 
   1127         # b_left still has trailing "."
   1128         # b_mid has no trailing "."
   1129         my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
   1130         return 1
   1131           if $r_left eq $b_left and $r_end <= $b_mid;
   1132       }
   1133     }
   1134   }
   1135 
   1136   return;
   1137 }
   1138 
   1139 # -------------------------------------
   1140 
   1141 sub output_changelog {
   1142 my $self = shift; my $class = ref $self;
   1143   my ($grand_poobah) = @_;
   1144   ### Process each ChangeLog
   1145 
   1146   while (my ($dir,$authorhash) = each %$grand_poobah)
   1147   {
   1148     &main::debug ("DOING DIR: $dir\n");
   1149 
   1150     # Here we twist our hash around, from being
   1151     #   author => time => message => filelist
   1152     # in %$authorhash to
   1153     #   time => author => message => filelist
   1154     # in %changelog.
   1155     #
   1156     # This is also where we merge entries.  The algorithm proceeds
   1157     # through the timeline of the changelog with a sliding window of
   1158     # $Max_Checkin_Duration seconds; within that window, entries that
   1159     # have the same log message are merged.
   1160     #
   1161     # (To save space, we zap %$authorhash after we've copied
   1162     # everything out of it.)
   1163 
   1164     my %changelog;
   1165     while (my ($author,$timehash) = each %$authorhash)
   1166     {
   1167       my %stamptime;
   1168       foreach my $time (sort {$a <=> $b} (keys %$timehash))
   1169       {
   1170         my $msghash = $timehash->{$time};
   1171         while (my ($msg,$qunklist) = each %$msghash)
   1172         {
   1173           my $stamptime = $stamptime{$msg};
   1174           if ((defined $stamptime)
   1175               and (($time - $stamptime) < $Max_Checkin_Duration)
   1176               and (defined $changelog{$stamptime}{$author}{$msg}))
   1177           {
   1178             push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
   1179           }
   1180           else {
   1181             $changelog{$time}{$author}{$msg} = $qunklist->files;
   1182             $stamptime{$msg} = $time;
   1183           }
   1184         }
   1185       }
   1186     }
   1187     undef (%$authorhash);
   1188 
   1189     ### Now we can write out the ChangeLog!
   1190 
   1191     my ($logfile_here, $logfile_bak, $tmpfile);
   1192     my $lastdate = "";
   1193     my $lastauthor = "";
   1194 
   1195     if (! $Output_To_Stdout) {
   1196       $logfile_here =  $dir . $Log_File_Name;
   1197       if (!$^O =~ /Win32/i) {
   1198         $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
   1199       }
   1200       else {
   1201         $logfile_here =~ s/^\.\/+//;      # remove any leading "./"
   1202       }
   1203       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
   1204       $logfile_bak  = "${logfile_here}.bak";
   1205 
   1206       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
   1207     }
   1208     else {
   1209       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
   1210     }
   1211 
   1212     print LOG_OUT $ChangeLog_Header;
   1213 
   1214     my %tag_date_printed;
   1215 
   1216     $self->output_header(\*LOG_OUT);
   1217 
   1218     my @key_list = ();
   1219     if($Chronological_Order) {
   1220         @key_list = sort {$a <=> $b} (keys %changelog);
   1221     } else {
   1222         @key_list = sort {$b <=> $a} (keys %changelog);
   1223     }
   1224     
   1225     if ( $Show_Tag_Dates || $XML_Output ) {
   1226       foreach my $time (@key_list) {
   1227         my $authorhash = $changelog{$time};
   1228         while (my ($author,$mesghash) = each %$authorhash) {
   1229           while (my ($msg,$qunk) = each %$mesghash) {
   1230             my $qunklist = $mesghash->{$msg};
   1231             for my $qunkref2 (@$qunklist) {
   1232               if (!$self->_revision_is_wanted($qunkref2)) {
   1233                 if (defined ($qunkref2->tags)) {
   1234                   for my $tag (@{$qunkref2->tags}) {
   1235                     $tag_date_printed{$tag} = 1;
   1236                   }
   1237                 }
   1238               }
   1239             }
   1240           }
   1241         }
   1242       }
   1243     }
   1244     
   1245     foreach my $time (@key_list)
   1246     {
   1247       next if ($Delta_Mode &&
   1248                (($time <= $Delta_StartTime) ||
   1249                 ($time > $Delta_EndTime && $Delta_EndTime)));
   1250 
   1251       # Set up the date/author line.
   1252       # kff todo: do some more XML munging here, on the header
   1253       # part of the entry:
   1254       my (undef,$min,$hour,$mday,$mon,$year,$wday)
   1255           = $UTC_Times ? gmtime($time) : localtime($time);
   1256 
   1257       $wday = $self->wday($wday);
   1258       # XML output includes everything else, we might as well make
   1259       # it always include Day Of Week too, for consistency.
   1260       my $authorhash = $changelog{$time};
   1261       if ( $Show_Tag_Dates || $XML_Output ) {
   1262         my %tags;
   1263         while (my ($author,$mesghash) = each %$authorhash) {
   1264           while (my ($msg,$qunk) = each %$mesghash) {
   1265             for my $qunkref2 (@$qunk) {
   1266               if (defined ($qunkref2->tags)) {
   1267                 for my $tag (@{$qunkref2->tags}) {
   1268                   $tags{$tag} = 1;
   1269                 }
   1270               }
   1271             }
   1272           }
   1273         }
   1274         # Sort here for determinism to ease testing
   1275         foreach my $tag (sort keys %tags) {
   1276           if ( ! defined $tag_date_printed{$tag} ) {
   1277             $tag_date_printed{$tag} = $time;
   1278             if ( (! defined $Regexp_Tag) or ( $tag =~ /$Regexp_Tag/ ) ) {
   1279               $self->output_tagdate(\*LOG_OUT, $time, $tag);
   1280               $lastauthor = ""
   1281             }
   1282           }
   1283         }
   1284       }
   1285       while (my ($author,$mesghash) = each %$authorhash)
   1286       {
   1287         # If XML, escape in outer loop to avoid compound quoting:
   1288         $author = $self->escape($author);
   1289 
   1290       FOOBIE:
   1291         # We sort here to enable predictable ordering for the testing porpoises
   1292         for my $msg (sort keys %$mesghash)
   1293         {
   1294           my $qunklist = $mesghash->{$msg};
   1295 
   1296           my @qunklist =
   1297             grep $self->_revision_is_wanted($_), @$qunklist;
   1298 
   1299           next FOOBIE unless @qunklist;
   1300 
   1301           my $files               = $self->pretty_file_list(\@qunklist);
   1302           my $header_line;          # date and author
   1303           my $wholething;           # $header_line + $body
   1304 
   1305           my $date = $self->fdatetime($time);
   1306           $header_line = $self->header_line($time, $author, $lastdate, $lastauthor);
   1307           $lastdate = $date;
   1308           $lastauthor = $author;
   1309 
   1310           $Text::Wrap::huge = 'overflow'
   1311             if $Text::Wrap::VERSION >= 2001.0130;
   1312           # Reshape the body according to user preferences.
   1313           my $body = $self->format_body($msg, $files, \@qunklist);
   1314 
   1315           $body =~ s/[ \t]+\n/\n/g;
   1316           $wholething = $header_line . $body;
   1317 
   1318           # One last check: make sure it passes the regexp test, if the
   1319           # user asked for that.  We have to do it here, so that the
   1320           # test can match against information in the header as well
   1321           # as in the text of the log message.
   1322 
   1323           # How annoying to duplicate so much code just because I
   1324           # can't figure out a way to evaluate scalars on the trailing
   1325           # operator portion of a regular expression.  Grrr.
   1326           if ($Case_Insensitive) {
   1327             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
   1328               $self->output_entry(\*LOG_OUT, $wholething);
   1329             }
   1330           }
   1331           else {
   1332             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
   1333               $self->output_entry(\*LOG_OUT, $wholething);
   1334             }
   1335           }
   1336         }
   1337       }
   1338     }
   1339 
   1340     $self->output_footer(\*LOG_OUT);
   1341 
   1342     close (LOG_OUT);
   1343 
   1344     if ( ! $Output_To_Stdout ) {
   1345       # If accumulating, append old data to new before renaming.  But
   1346       # don't append the most recent entry, since it's already in the
   1347       # new log due to CVS's idiosyncratic interpretation of "log -d".
   1348       if ($Cumulative && -f $logfile_here) {
   1349         open NEW_LOG, ">>$tmpfile"
   1350           or die "trouble appending to $tmpfile ($!)";
   1351 
   1352         open OLD_LOG, "<$logfile_here"
   1353           or die "trouble reading from $logfile_here ($!)";
   1354 
   1355         my $started_first_entry = 0;
   1356         my $passed_first_entry = 0;
   1357         while (<OLD_LOG>) {
   1358           if ( ! $passed_first_entry ) {
   1359             if ( ( ! $started_first_entry )
   1360                 and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
   1361               $started_first_entry = 1;
   1362             } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
   1363               $passed_first_entry = 1;
   1364               print NEW_LOG $_;
   1365             }
   1366           } else {
   1367             print NEW_LOG $_;
   1368           }
   1369         }
   1370 
   1371         close NEW_LOG;
   1372         close OLD_LOG;
   1373       }
   1374 
   1375       if ( -f $logfile_here ) {
   1376         rename $logfile_here, $logfile_bak;
   1377       }
   1378       rename $tmpfile, $logfile_here;
   1379     }
   1380   }
   1381 }
   1382 
   1383 # -------------------------------------
   1384 
   1385 # Don't call this wrap, because with 5.5.3, that clashes with the
   1386 # (unconditional :-( ) export of wrap() from Text::Wrap
   1387 sub mywrap {
   1388   my $self = shift;
   1389   my ($indent1, $indent2, @text) = @_;
   1390   # If incoming text looks preformatted, don't get clever
   1391   my $text = Text::Wrap::wrap($indent1, $indent2, @text);
   1392   if ( grep /^\s+/m, @text ) {
   1393     return $text;
   1394   }
   1395   my @lines = split /\n/, $text;
   1396   $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
   1397   $lines[0] =~ s/^$indent1\s+/$indent1/;
   1398   s/^$indent2\s+/$indent2/
   1399     for @lines[1..$#lines];
   1400   my $newtext = join "\n", @lines;
   1401   $newtext .= "\n"
   1402     if substr($text, -1) eq "\n";
   1403   return $newtext;
   1404 }
   1405 
   1406 # -------------------------------------
   1407 
   1408 sub preprocess_msg_text {
   1409   my $self = shift;
   1410   my ($text) = @_;
   1411 
   1412   # Strip out carriage returns (as they probably result from DOSsy editors).
   1413   $text =~ s/\r\n/\n/g;
   1414   # If it *looks* like two newlines, make it *be* two newlines:
   1415   $text =~ s/\n\s*\n/\n\n/g;
   1416 
   1417   return $text;
   1418 }
   1419 
   1420 # -------------------------------------
   1421 
   1422 sub last_line_len {
   1423   my $self = shift;
   1424 
   1425   my $files_list = shift;
   1426   my @lines = split (/\n/, $files_list);
   1427   my $last_line = pop (@lines);
   1428   return length ($last_line);
   1429 }
   1430 
   1431 # -------------------------------------
   1432 
   1433 # A custom wrap function, sensitive to some common constructs used in
   1434 # log entries.
   1435 sub wrap_log_entry {
   1436   my $self = shift;
   1437 
   1438   my $text = shift;                  # The text to wrap.
   1439   my $left_pad_str = shift;          # String to pad with on the left.
   1440 
   1441   # These do NOT take left_pad_str into account:
   1442   my $length_remaining = shift;      # Amount left on current line.
   1443   my $max_line_length  = shift;      # Amount left for a blank line.
   1444 
   1445   my $wrapped_text = '';             # The accumulating wrapped entry.
   1446   my $user_indent = '';              # Inherited user_indent from prev line.
   1447 
   1448   my $first_time = 1;                # First iteration of the loop?
   1449   my $suppress_line_start_match = 0; # Set to disable line start checks.
   1450 
   1451   my @lines = split (/\n/, $text);
   1452   while (@lines)   # Don't use `foreach' here, it won't work.
   1453   {
   1454     my $this_line = shift (@lines);
   1455     chomp $this_line;
   1456 
   1457     if ($this_line =~ /^(\s+)/) {
   1458       $user_indent = $1;
   1459     }
   1460     else {
   1461       $user_indent = '';
   1462     }
   1463 
   1464     # If it matches any of the line-start regexps, print a newline now...
   1465     if ($suppress_line_start_match)
   1466     {
   1467       $suppress_line_start_match = 0;
   1468     }
   1469     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
   1470            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
   1471            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
   1472            || ($this_line =~ /^(\s+)(\S+)/)
   1473            || ($this_line =~ /^(\s*)- +/)
   1474            || ($this_line =~ /^()\s*$/)
   1475            || ($this_line =~ /^(\s*)\*\) +/)
   1476            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
   1477     {
   1478       # Make a line break immediately, unless header separator is set
   1479       # and this line is the first line in the entry, in which case
   1480       # we're getting the blank line for free already and shouldn't
   1481       # add an extra one.
   1482       unless (($After_Header ne " ") and ($first_time))
   1483       {
   1484         if ($this_line =~ /^()\s*$/) {
   1485           $suppress_line_start_match = 1;
   1486           $wrapped_text .= "\n${left_pad_str}";
   1487         }
   1488 
   1489         $wrapped_text .= "\n${left_pad_str}";
   1490       }
   1491 
   1492       $length_remaining = $max_line_length - (length ($user_indent));
   1493     }
   1494 
   1495     # Now that any user_indent has been preserved, strip off leading
   1496     # whitespace, so up-folding has no ugly side-effects.
   1497     $this_line =~ s/^\s*//;
   1498 
   1499     # Accumulate the line, and adjust parameters for next line.
   1500     my $this_len = length ($this_line);
   1501     if ($this_len == 0)
   1502     {
   1503       # Blank lines should cancel any user_indent level.
   1504       $user_indent = '';
   1505       $length_remaining = $max_line_length;
   1506     }
   1507     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
   1508     {
   1509       # Walk backwards from the end.  At first acceptable spot, break
   1510       # a new line.
   1511       my $idx = $length_remaining - 1;
   1512       if ($idx < 0) { $idx = 0 };
   1513       while ($idx > 0)
   1514       {
   1515         if (substr ($this_line, $idx, 1) =~ /\s/)
   1516         {
   1517           my $line_now = substr ($this_line, 0, $idx);
   1518           my $next_line = substr ($this_line, $idx);
   1519           $this_line = $line_now;
   1520 
   1521           # Clean whitespace off the end.
   1522           chomp $this_line;
   1523 
   1524           # The current line is ready to be printed.
   1525           $this_line .= "\n${left_pad_str}";
   1526 
   1527           # Make sure the next line is allowed full room.
   1528           $length_remaining = $max_line_length - (length ($user_indent));
   1529 
   1530           # Strip next_line, but then preserve any user_indent.
   1531           $next_line =~ s/^\s*//;
   1532 
   1533           # Sneak a peek at the user_indent of the upcoming line, so
   1534           # $next_line (which will now precede it) can inherit that
   1535           # indent level.  Otherwise, use whatever user_indent level
   1536           # we currently have, which might be none.
   1537           my $next_next_line = shift (@lines);
   1538           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
   1539             $next_line = $1 . $next_line if (defined ($1));
   1540             # $length_remaining = $max_line_length - (length ($1));
   1541             $next_next_line =~ s/^\s*//;
   1542           }
   1543           else {
   1544             $next_line = $user_indent . $next_line;
   1545           }
   1546           if (defined ($next_next_line)) {
   1547             unshift (@lines, $next_next_line);
   1548           }
   1549           unshift (@lines, $next_line);
   1550 
   1551           # Our new next line might, coincidentally, begin with one of
   1552           # the line-start regexps, so we temporarily turn off
   1553           # sensitivity to that until we're past the line.
   1554           $suppress_line_start_match = 1;
   1555 
   1556           last;
   1557         }
   1558         else
   1559         {
   1560           $idx--;
   1561         }
   1562       }
   1563 
   1564       if ($idx == 0)
   1565       {
   1566         # We bottomed out because the line is longer than the
   1567         # available space.  But that could be because the space is
   1568         # small, or because the line is longer than even the maximum
   1569         # possible space.  Handle both cases below.
   1570 
   1571         if ($length_remaining == ($max_line_length - (length ($user_indent))))
   1572         {
   1573           # The line is simply too long -- there is no hope of ever
   1574           # breaking it nicely, so just insert it verbatim, with
   1575           # appropriate padding.
   1576           $this_line = "\n${left_pad_str}${this_line}";
   1577         }
   1578         else
   1579         {
   1580           # Can't break it here, but may be able to on the next round...
   1581           unshift (@lines, $this_line);
   1582           $length_remaining = $max_line_length - (length ($user_indent));
   1583           $this_line = "\n${left_pad_str}";
   1584         }
   1585       }
   1586     }
   1587     else  # $this_len < $length_remaining, so tack on what we can.
   1588     {
   1589       # Leave a note for the next iteration.
   1590       $length_remaining = $length_remaining - $this_len;
   1591 
   1592       if ($this_line =~ /\.$/)
   1593       {
   1594         $this_line .= "  ";
   1595         $length_remaining -= 2;
   1596       }
   1597       else  # not a sentence end
   1598       {
   1599         $this_line .= " ";
   1600         $length_remaining -= 1;
   1601       }
   1602     }
   1603 
   1604     # Unconditionally indicate that loop has run at least once.
   1605     $first_time = 0;
   1606 
   1607     $wrapped_text .= "${user_indent}${this_line}";
   1608   }
   1609 
   1610   # One last bit of padding.
   1611   $wrapped_text .= "\n";
   1612 
   1613   return $wrapped_text;
   1614 }
   1615 
   1616 # -------------------------------------
   1617 
   1618 sub _pretty_file_list {
   1619   my $self = shift;
   1620 
   1621   my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
   1622 
   1623   my @qunkrefs =
   1624     grep +( ( ! $_->tags_exists
   1625               or
   1626               ! grep exists $ignore_tags{$_}, @{$_->tags})
   1627             and
   1628             ( ! keys %show_tags
   1629               or
   1630               ( $_->tags_exists
   1631                 and
   1632                 grep exists $show_tags{$_}, @{$_->tags} )
   1633             )
   1634           ),
   1635     @$qunksref;
   1636 
   1637   my $common_dir;           # Dir prefix common to all files ('' if none)
   1638 
   1639   # First, loop over the qunks gathering all the tag/branch names.
   1640   # We'll put them all in non_unanimous_tags, and take out the
   1641   # unanimous ones later.
   1642  QUNKREF:
   1643   foreach my $qunkref (@qunkrefs)
   1644   {
   1645     # Keep track of whether all the files in this commit were in the
   1646     # same directory, and memorize it if so.  We can make the output a
   1647     # little more compact by mentioning the directory only once.
   1648     if ($Common_Dir && (scalar (@qunkrefs)) > 1)
   1649     {
   1650       if (! (defined ($common_dir)))
   1651       {
   1652         my ($base, $dir);
   1653         ($base, $dir, undef) = fileparse ($qunkref->filename);
   1654 
   1655         if ((! (defined ($dir)))  # this first case is sheer paranoia
   1656             or ($dir eq '')
   1657             or ($dir eq "./")
   1658             or ($dir eq ".\\"))
   1659         {
   1660           $common_dir = '';
   1661         }
   1662         else
   1663         {
   1664           $common_dir = $dir;
   1665         }
   1666       }
   1667       elsif ($common_dir ne '')
   1668       {
   1669         # Already have a common dir prefix, so how much of it can we preserve?
   1670         $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
   1671       }
   1672     }
   1673     else  # only one file in this entry anyway, so common dir not an issue
   1674     {
   1675       $common_dir = '';
   1676     }
   1677 
   1678     if (defined ($qunkref->branch)) {
   1679       $all_branches->{$qunkref->branch} = 1;
   1680     }
   1681     if (defined ($qunkref->tags)) {
   1682       foreach my $tag (@{$qunkref->tags}) {
   1683         $non_unanimous_tags->{$tag} = 1;
   1684       }
   1685     }
   1686   }
   1687 
   1688   # Any tag held by all qunks will be printed specially... but only if
   1689   # there are multiple qunks in the first place!
   1690   if ((scalar (@qunkrefs)) > 1) {
   1691     foreach my $tag (keys (%$non_unanimous_tags)) {
   1692       my $everyone_has_this_tag = 1;
   1693       foreach my $qunkref (@qunkrefs) {
   1694         if ((! (defined ($qunkref->tags)))
   1695             or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
   1696           $everyone_has_this_tag = 0;
   1697         }
   1698       }
   1699       if ($everyone_has_this_tag) {
   1700         $unanimous_tags->{$tag} = 1;
   1701         delete $non_unanimous_tags->{$tag};
   1702       }
   1703     }
   1704   }
   1705 
   1706   return $common_dir, \@qunkrefs;
   1707 }
   1708 
   1709 # -------------------------------------
   1710 
   1711 sub fdatetime {
   1712   my $self = shift;
   1713 
   1714   my ($year, $mday, $mon, $wday, $hour, $min);
   1715 
   1716   if ( @_ > 1 ) {
   1717     ($year, $mday, $mon, $wday, $hour, $min) = @_;
   1718   } else {
   1719     my ($time) = @_;
   1720     (undef, $min, $hour, $mday, $mon, $year, $wday) =
   1721       $UTC_Times ? gmtime($time) : localtime($time);
   1722 
   1723     $year += 1900;
   1724     $mon  += 1;
   1725     $wday  = $self->wday($wday);
   1726   }
   1727 
   1728   my $fdate = $self->fdate($year, $mon, $mday, $wday);
   1729 
   1730   if ($Show_Times) {
   1731     my $ftime = $self->ftime($hour, $min);
   1732     return "$fdate $ftime";
   1733   } else {
   1734     return $fdate;
   1735   }
   1736 }
   1737 
   1738 # -------------------------------------
   1739 
   1740 sub fdate {
   1741   my $self = shift;
   1742 
   1743   my ($year, $mday, $mon, $wday);
   1744 
   1745   if ( @_ > 1 ) {
   1746     ($year, $mon, $mday, $wday) = @_;
   1747   } else {
   1748     my ($time) = @_;
   1749     (undef, undef, undef, $mday, $mon, $year, $wday) =
   1750       $UTC_Times ? gmtime($time) : localtime($time);
   1751 
   1752     $year += 1900;
   1753     $mon  += 1;
   1754     $wday  = $self->wday($wday);
   1755   }
   1756 
   1757   return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
   1758 }
   1759 
   1760 # -------------------------------------
   1761 
   1762 sub ftime {
   1763   my $self = shift;
   1764 
   1765   my ($hour, $min);
   1766 
   1767   if ( @_ > 1 ) {
   1768     ($hour, $min) = @_;
   1769   } else {
   1770     my ($time) = @_;
   1771     (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
   1772   }
   1773 
   1774   return sprintf '%02u:%02u', $hour, $min;
   1775 }
   1776 
   1777 # ----------------------------------------------------------------------------
   1778 
   1779 package CVS::Utils::ChangeLog::Message;
   1780 
   1781 sub new {
   1782   my $class = shift;
   1783   my ($msg) = @_;
   1784 
   1785   my %self = (msg => $msg, files => []);
   1786 
   1787   bless \%self, $class;
   1788 }
   1789 
   1790 sub add_fileentry {
   1791   my $self = shift;
   1792   my ($fileentry) = @_;
   1793 
   1794   die "Not a fileentry: $fileentry"
   1795     unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
   1796 
   1797   push @{$self->{files}}, $fileentry;
   1798 }
   1799 
   1800 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
   1801 
   1802 # ----------------------------------------------------------------------------
   1803 
   1804 package CVS::Utils::ChangeLog::FileEntry;
   1805 
   1806 use File::Basename qw( fileparse );
   1807 
   1808 # Each revision of a file has a little data structure (a `qunk')
   1809 # associated with it.  That data structure holds not only the
   1810 # file's name, but any additional information about the file
   1811 # that might be needed in the output, such as the revision
   1812 # number, tags, branches, etc.  The reason to have these things
   1813 # arranged in a data structure, instead of just appending them
   1814 # textually to the file's name, is that we may want to do a
   1815 # little rearranging later as we write the output.  For example,
   1816 # all the files on a given tag/branch will go together, followed
   1817 # by the tag in parentheses (so trunk or otherwise non-tagged
   1818 # files would go at the end of the file list for a given log
   1819 # message).  This rearrangement is a lot easier to do if we
   1820 # don't have to reparse the text.
   1821 #
   1822 # A qunk looks like this:
   1823 #
   1824 #   {
   1825 #     filename    =>    "hello.c",
   1826 #     revision    =>    "1.4.3.2",
   1827 #     time        =>    a timegm() return value (moment of commit)
   1828 #     tags        =>    [ "tag1", "tag2", ... ],
   1829 #     branch      =>    "branchname" # There should be only one, right?
   1830 #     roots       =>    [ "branchtag1", "branchtag2", ... ]
   1831 #     lines       =>    "+x -y" # or undefined; x and y are integers
   1832 #   }
   1833 
   1834 # Single top-level ChangeLog, or one per subdirectory?
   1835 my $distributed;
   1836 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
   1837 
   1838 sub new {
   1839   my $class = shift;
   1840   my ($path, $time, $revision, $state, $lines,
   1841       $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
   1842 
   1843   my %self = (time     => $time,
   1844               revision => $revision,
   1845               state    => $state,
   1846               lines    => $lines,
   1847               branch_numbers => $branch_numbers,
   1848              );
   1849 
   1850   if ( $distributed ) {
   1851     @self{qw(filename dir_key)} = fileparse($path);
   1852   } else {
   1853     @self{qw(filename dir_key)} = ($path, './');
   1854   }
   1855 
   1856   { # Scope for $branch_prefix
   1857     (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
   1858     $branch_prefix =~ s/\.$//;
   1859     if ( $branch_names->{$branch_prefix} ) {
   1860       my $branch_name = $branch_names->{$branch_prefix};
   1861       $self{branch}   = $branch_name;
   1862       $self{branches} = [$branch_name];
   1863     }
   1864     while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
   1865       push @{$self{branches}}, $branch_names->{$branch_prefix}
   1866         if exists $branch_names->{$branch_prefix};
   1867     }
   1868   }
   1869 
   1870   # If there's anything in the @branch_roots array, then this
   1871   # revision is the root of at least one branch.  We'll display
   1872   # them as branch names instead of revision numbers, the
   1873   # substitution for which is done directly in the array:
   1874   $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
   1875     if @$branch_roots;
   1876 
   1877   if ( exists $symbolic_names->{$revision} ) {
   1878     $self{tags} = delete $symbolic_names->{$revision};
   1879     &main::delta_check($time, $self{tags});
   1880   }
   1881 
   1882   bless \%self, $class;
   1883 }
   1884 
   1885 sub filename       { $_[0]->{filename}       }
   1886 sub dir_key        { $_[0]->{dir_key}        }
   1887 sub revision       { $_[0]->{revision}       }
   1888 sub branch         { $_[0]->{branch}         }
   1889 sub state          { $_[0]->{state}          }
   1890 sub lines          { $_[0]->{lines}          }
   1891 sub roots          { $_[0]->{roots}          }
   1892 sub branch_numbers { $_[0]->{branch_numbers} }
   1893 
   1894 sub tags        { $_[0]->{tags}     }
   1895 sub tags_exists {
   1896   exists $_[0]->{tags};
   1897 }
   1898 
   1899 # This may someday be used in a more sophisticated calculation of what other
   1900 # files are involved in this commit.  For now, we don't use it much except for
   1901 # delta mode, because the common-commit-detection algorithm is hypothesized to
   1902 # be "good enough" as it stands.
   1903 sub time     { $_[0]->{time}     }
   1904 
   1905 # ----------------------------------------------------------------------------
   1906 
   1907 package CVS::Utils::ChangeLog::EntrySetBuilder;
   1908 
   1909 use File::Basename qw( fileparse );
   1910 use Time::Local    qw( timegm );
   1911 
   1912 use constant MAILNAME => "/etc/mailname";
   1913 
   1914 # In 'cvs log' output, one long unbroken line of equal signs separates files:
   1915 use constant FILE_SEPARATOR => '=' x 77;# . "\n";
   1916 # In 'cvs log' output, a shorter line of dashes separates log messages within
   1917 # a file:
   1918 use constant REV_SEPARATOR  => '-' x 28;# . "\n";
   1919 
   1920 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
   1921 
   1922 # -------------------------------------
   1923 
   1924 sub new {
   1925   my ($proto) = @_;
   1926   my $class = ref $proto || $proto;
   1927 
   1928   my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
   1929   my $self = bless +{ grand_poobah => $poobah }, $class;
   1930 
   1931   $self->clear_file;
   1932   $self->maybe_read_user_map_file;
   1933   return $self;
   1934 }
   1935 
   1936 # -------------------------------------
   1937 
   1938 sub clear_msg {
   1939   my ($self) = @_;
   1940 
   1941   # Make way for the next message
   1942   undef $self->{rev_msg};
   1943   undef $self->{rev_time};
   1944   undef $self->{rev_revision};
   1945   undef $self->{rev_author};
   1946   undef $self->{rev_state};
   1947   undef $self->{lines};
   1948   $self->{rev_branch_roots} = [];       # For showing which files are branch
   1949                                         # ancestors.
   1950   $self->{collecting_symbolic_names} = 0;
   1951 }
   1952 
   1953 # -------------------------------------
   1954 
   1955 sub clear_file {
   1956   my ($self) = @_;
   1957   $self->clear_msg;
   1958 
   1959   undef $self->{filename};
   1960   $self->{branch_names}   = +{};        # We'll grab branch names while we're
   1961                                         # at it.
   1962   $self->{branch_numbers} = +{};        # Save some revisions for
   1963                                         # @Follow_Branches
   1964   $self->{symbolic_names} = +{};        # Where tag names get stored.
   1965 }
   1966 
   1967 # -------------------------------------
   1968 
   1969 sub grand_poobah { $_[0]->{grand_poobah} }
   1970 
   1971 # -------------------------------------
   1972 
   1973 sub read_changelog {
   1974   my ($self, $command) = @_;
   1975 
   1976   local (*READER);
   1977   my $pid;
   1978   if (! $Input_From_Stdin) {
   1979     if ($^O =~ /Win32/i) {
   1980       open (READER, "@$command |")
   1981         or die "unable to run \"@$command\"";
   1982     }
   1983     else {
   1984       local (*WRITER);
   1985       pipe(READER, WRITER)
   1986         or die "Couldn't form pipe: $!\n";
   1987       $pid = fork;
   1988       if (! defined $pid) {
   1989         die "Couldn't fork: $!\n";
   1990       }
   1991       if ( ! $pid ) { # child
   1992         open STDOUT, '>&=' . fileno WRITER
   1993           or die "Couldn't dup stderr to ", fileno WRITER, "\n";
   1994         # strangely, some perls give spurious warnings about STDIN being opened
   1995         # for output only these close calls precede the STDOUT reopen above.
   1996         # I think they must be reusing fd 1.
   1997         close READER;
   1998         close STDIN;
   1999 
   2000         exec @$command;
   2001       }
   2002 
   2003       close WRITER;
   2004     }
   2005 
   2006     &main::debug ("(run \"@$command\")\n");
   2007   }
   2008   else {
   2009     open READER, '-' or die "unable to open stdin for reading";
   2010   }
   2011 
   2012   binmode READER;
   2013 
   2014  XX_Log_Source:
   2015   while (<READER>) {
   2016     chomp;
   2017     s!\r$!!;
   2018 
   2019     # If on a new file and don't see filename, skip until we find it, and
   2020     # when we find it, grab it.
   2021     if ( ! defined $self->{filename} ) {
   2022       $self->read_file_path($_);
   2023     } elsif ( /^symbolic names:$/ ) {
   2024       $self->{collecting_symbolic_names} = 1;
   2025     } elsif ( $self->{collecting_symbolic_names} ) {
   2026       $self->read_symbolic_name($_);
   2027     } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
   2028       $self->clear_file;
   2029     } elsif ( ! defined $self->{rev_revision} ) {
   2030         # If have file name, but not revision, and see revision, then grab
   2031         # it.  (We collect unconditionally, even though we may or may not
   2032         # ever use it.)
   2033       $self->read_revision($_);
   2034     } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
   2035       $self->read_date_author_and_state($_);
   2036     } elsif ( /^branches:\s+(.*);$/ ) {
   2037       $self->read_branches($1);
   2038     } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
   2039       # If have file name, time, and author, then we're just grabbing
   2040       # log message texts:
   2041       $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
   2042     } else {
   2043       my $noadd = 0;
   2044       if ( ! $self->{rev_msg}
   2045            or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
   2046            or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
   2047         # ... until a msg separator is encountered:
   2048         # Ensure the message contains something:
   2049         $self->clear_msg, $noadd = 1
   2050           if $Prune_Empty_Msgs;
   2051         $self->{rev_msg} = "[no log message]\n";
   2052       }
   2053 
   2054       $self->add_file_entry
   2055         unless $noadd;
   2056 
   2057       if ( $_ eq FILE_SEPARATOR ) {
   2058         $self->clear_file;
   2059       } else {
   2060         $self->clear_msg;
   2061       }
   2062     }
   2063   }
   2064 
   2065   close READER
   2066     or die "Couldn't close pipe reader: $!\n";
   2067   if ( defined $pid ) {
   2068     my $rv;
   2069     waitpid $pid, 0;
   2070     0 == $?
   2071       or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
   2072                            $pid, $? >> 8, $? & 127, $? & 128);
   2073   }
   2074   return;
   2075 }
   2076 
   2077 # -------------------------------------
   2078 
   2079 sub add_file_entry {
   2080   $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
   2081                                                  rev_state lines branch_names
   2082                                                  rev_branch_roots
   2083                                                  branch_numbers
   2084                                                  symbolic_names
   2085                                                  rev_author rev_msg)});
   2086 }
   2087 
   2088 # -------------------------------------
   2089 
   2090 sub maybe_read_user_map_file {
   2091   my ($self) = @_;
   2092 
   2093   my %expansions;
   2094   my $User_Map_Input;
   2095 
   2096   if (defined $User_Passwd_File)
   2097   {
   2098     if ( ! defined $Domain ) {
   2099       if ( -e MAILNAME ) {
   2100         chomp($Domain = slurp_file(MAILNAME));
   2101       } else {
   2102       MAILDOMAIN_CMD:
   2103         for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
   2104           my ($text, $exit, $sig, $core) = run_ext($_);
   2105           if ( $exit == 0 && $sig == 0 && $core == 0 ) {
   2106             chomp $text;
   2107             if ( length $text ) {
   2108               $Domain = $text;
   2109               last MAILDOMAIN_CMD;
   2110             }
   2111           }
   2112         }
   2113       }
   2114     }
   2115 
   2116     die "No mail domain found\n"
   2117       unless defined $Domain;
   2118 
   2119     open (MAPFILE, "<$User_Passwd_File")
   2120         or die ("Unable to open $User_Passwd_File ($!)");
   2121     while (<MAPFILE>)
   2122     {
   2123       # all lines are valid
   2124       my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
   2125       my $expansion = '';
   2126       ($expansion) = split (',', $gecos)
   2127         if defined $gecos && length $gecos;
   2128 
   2129       my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
   2130       $expansions{$username} = "$expansion <$mailname>";
   2131     }
   2132     close (MAPFILE);
   2133   }
   2134 
   2135   if ($User_Map_File)
   2136   {
   2137     if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
   2138          !-f $User_Map_File )
   2139     {
   2140       my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
   2141       $User_Map_Input = "$rsh $1 'cat $2' |";
   2142       &main::debug ("(run \"${User_Map_Input}\")\n");
   2143     }
   2144     else
   2145     {
   2146       $User_Map_Input = "<$User_Map_File";
   2147     }
   2148 
   2149     open (MAPFILE, $User_Map_Input)
   2150         or die ("Unable to open $User_Map_File ($!)");
   2151 
   2152     while (<MAPFILE>)
   2153     {
   2154       next if /^\s*#/;  # Skip comment lines.
   2155       next if not /:/;  # Skip lines without colons.
   2156 
   2157       # It is now safe to split on ':'.
   2158       my ($username, $expansion) = split ':';
   2159       chomp $expansion;
   2160       $expansion =~ s/^'(.*)'$/$1/;
   2161       $expansion =~ s/^"(.*)"$/$1/;
   2162 
   2163       # If it looks like the expansion has a real name already, then
   2164       # we toss the username we got from CVS log.  Otherwise, keep
   2165       # it to use in combination with the email address.
   2166 
   2167       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
   2168         # Also, add angle brackets if none present
   2169         if (! ($expansion =~ /<\S+@\S+>/)) {
   2170           $expansions{$username} = "$username <$expansion>";
   2171         }
   2172         else {
   2173           $expansions{$username} = "$username $expansion";
   2174         }
   2175       }
   2176       else {
   2177         $expansions{$username} = $expansion;
   2178       }
   2179     } # fi ($User_Map_File)
   2180 
   2181     close (MAPFILE);
   2182   }
   2183 
   2184  $self->{usermap} = \%expansions;
   2185 }
   2186 
   2187 # -------------------------------------
   2188 
   2189 sub read_file_path {
   2190   my ($self, $line) = @_;
   2191 
   2192   my $path;
   2193 
   2194   if ( $line =~ /^Working file: (.*)/ ) {
   2195     $path = $1;
   2196   } elsif ( defined $RCS_Root
   2197             and
   2198             $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
   2199     $path = $1;
   2200     $path =~ s!Attic/!!;
   2201   } else {
   2202     return;
   2203   }
   2204 
   2205   if ( @Ignore_Files ) {
   2206     my $base;
   2207     ($base, undef, undef) = fileparse($path);
   2208 
   2209     my $xpath = $Case_Insensitive ? lc($path) : $path;
   2210     return
   2211       if grep $path =~ /$_/, @Ignore_Files;
   2212   }
   2213 
   2214   $self->{filename} = $path;
   2215   return;
   2216 }
   2217 
   2218 # -------------------------------------
   2219 
   2220 sub read_symbolic_name {
   2221   my ($self, $line) = @_;
   2222 
   2223   # All tag names are listed with whitespace in front in cvs log
   2224   # output; so if see non-whitespace, then we're done collecting.
   2225   if ( /^\S/ ) {
   2226     $self->{collecting_symbolic_names} = 0;
   2227     return;
   2228   } else {
   2229     # we're looking at a tag name, so parse & store it
   2230 
   2231     # According to the Cederqvist manual, in node "Tags", tag names must start
   2232     # with an uppercase or lowercase letter and can contain uppercase and
   2233     # lowercase letters, digits, `-', and `_'.  However, it's not our place to
   2234     # enforce that, so we'll allow anything CVS hands us to be a tag:
   2235     my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
   2236 
   2237     # A branch number either has an odd number of digit sections
   2238     # (and hence an even number of dots), or has ".0." as the
   2239     # second-to-last digit section.  Test for these conditions.
   2240     my $real_branch_rev = '';
   2241     if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
   2242          and
   2243          $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
   2244       $real_branch_rev = $tag_rev;
   2245     } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
   2246       $real_branch_rev = $1 . $3;
   2247     }
   2248 
   2249     # If we got a branch, record its number.
   2250     if ( $real_branch_rev ) {
   2251       $self->{branch_names}->{$real_branch_rev} = $tag_name;
   2252       $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
   2253     } else {
   2254       # Else it's just a regular (non-branch) tag.
   2255       push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
   2256     }
   2257   }
   2258 
   2259   $self->{collecting_symbolic_names} = 1;
   2260   return;
   2261 }
   2262 
   2263 # -------------------------------------
   2264 
   2265 sub read_revision {
   2266   my ($self, $line) = @_;
   2267 
   2268   my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
   2269 
   2270   return
   2271     unless $revision;
   2272 
   2273   $self->{rev_revision} = $revision;
   2274   return;
   2275 }
   2276 
   2277 # -------------------------------------
   2278 
   2279 { # Closure over %gecos_warned
   2280 my %gecos_warned;
   2281 sub read_date_author_and_state {
   2282   my ($self, $line) = @_;
   2283 
   2284   my ($time, $author, $state) = $self->parse_date_author_and_state($line);
   2285 
   2286   if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
   2287     $author = $self->{usermap}->{$author};
   2288   } elsif ( defined $Domain or $Gecos == 1 ) {
   2289     my $email = $author;
   2290     $email = $author."@".$Domain
   2291       if defined $Domain && $Domain ne '';
   2292 
   2293     my $pw = getpwnam($author);
   2294     my ($fullname, $office, $workphone, $homephone, $gcos);
   2295     if ( defined $pw ) {
   2296       $gcos = (getpwnam($author))[6];
   2297       ($fullname, $office, $workphone, $homephone) =
   2298         split /\s*,\s*/, $gcos;
   2299     } else {
   2300       warn "Couldn't find gecos info for author '$author'\n"
   2301         unless $gecos_warned{$author}++;
   2302       $fullname = '';
   2303     }
   2304     for (grep defined, $fullname, $office, $workphone, $homephone) {
   2305       s/&/ucfirst(lc($pw))/ge;
   2306     }
   2307     $author = $fullname . "  <" . $email . ">"
   2308       if defined $fullname && $fullname ne '';
   2309   }
   2310 
   2311   $self->{rev_state}  = $state;
   2312   $self->{rev_time}   = $time;
   2313   $self->{rev_author} = $author;
   2314   return;
   2315 }
   2316 }
   2317 
   2318 # -------------------------------------
   2319 
   2320 sub read_branches {
   2321   # A "branches: ..." line here indicates that one or more branches
   2322   # are rooted at this revision.  If we're showing branches, then we
   2323   # want to show that fact as well, so we collect all the branches
   2324   # that this is the latest ancestor of and store them in
   2325   # $self->[rev_branch_roots}.  Just for reference, the format of the
   2326   # line we're seeing at this point is:
   2327   #
   2328   #    branches:  1.5.2;  1.5.4;  ...;
   2329   #
   2330   # Okay, here goes:
   2331   my ($self, $line) = @_;
   2332 
   2333   # Ugh.  This really bothers me.  Suppose we see a log entry
   2334   # like this:
   2335   #
   2336   #    ----------------------------
   2337   #    revision 1.1
   2338   #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
   2339   #    branches:  1.1.2;
   2340   #    Intended first line of log message begins here.
   2341   #    ----------------------------
   2342   #
   2343   # The question is, how we can tell the difference between that
   2344   # log message and a *two*-line log message whose first line is
   2345   #
   2346   #    "branches:  1.1.2;"
   2347   #
   2348   # See the problem?  The output of "cvs log" is inherently
   2349   # ambiguous.
   2350   #
   2351   # For now, we punt: we liberally assume that people don't
   2352   # write log messages like that, and just toss a "branches:"
   2353   # line if we see it but are not showing branches.  I hope no
   2354   # one ever loses real log data because of this.
   2355   if ( $Show_Branches ) {
   2356     $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
   2357     $self->{rev_branch_roots} = [split /;\s+/, $line]
   2358       if length $line;
   2359   }
   2360 }
   2361 
   2362 # -------------------------------------
   2363 
   2364 sub parse_date_author_and_state {
   2365   my ($self, $line) = @_;
   2366   # Parses the date/time and author out of a line like:
   2367   #
   2368   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
   2369   #
   2370   # or, in CVS 1.12.9:
   2371   #
   2372   # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
   2373 
   2374   my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
   2375     $line =~
   2376       m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
   2377         author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
   2378     or  die "Couldn't parse date ``$line''";
   2379   die "Bad date or Y2K issues"
   2380     unless $year > 1969 and $year < 2258;
   2381   # Kinda arbitrary, but useful as a sanity check
   2382   my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
   2383   if ( defined $utcOffset ) {
   2384     my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
   2385     my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
   2386     $time += $offset;
   2387   }
   2388   if ( $rest =~ m!\s+lines:\s+(.*)! ) {
   2389     $self->{lines} = $1;
   2390   }
   2391 
   2392   return $time, $author, $state;
   2393 }
   2394 
   2395 # Subrs ----------------------------------------------------------------------
   2396 
   2397 package main;
   2398 
   2399 sub delta_check {
   2400   my ($time, $tags) = @_;
   2401 
   2402   # If we're in 'delta' mode, update the latest observed times for the
   2403   # beginning and ending tags, and when we get around to printing output, we
   2404   # will simply restrict ourselves to that timeframe...
   2405   return
   2406     unless $Delta_Mode;
   2407 
   2408   $Delta_StartTime = $time
   2409     if $time > $Delta_StartTime and $Delta_From and grep { $_ eq $Delta_From } @$tags;
   2410 
   2411   $Delta_EndTime = $time
   2412     if $time > $Delta_EndTime and $Delta_To and grep { $_ eq $Delta_To } @$tags;
   2413 }
   2414 
   2415 sub run_ext {
   2416   my ($cmd) = @_;
   2417   $cmd = [$cmd]
   2418     unless ref $cmd;
   2419   local $" = ' ';
   2420   my $out = qx"@$cmd 2>&1";
   2421   my $rv  = $?;
   2422   my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
   2423   return $out, $exit, $sig, $core;
   2424 }
   2425 
   2426 # -------------------------------------
   2427 
   2428 # If accumulating, grab the boundary date from pre-existing ChangeLog.
   2429 sub maybe_grab_accumulation_date {
   2430   if (! $Cumulative || $Update) {
   2431     return '';
   2432   }
   2433 
   2434   # else
   2435 
   2436   open (LOG, "$Log_File_Name")
   2437       or die ("trouble opening $Log_File_Name for reading ($!)");
   2438 
   2439   my $boundary_date;
   2440   while (<LOG>)
   2441   {
   2442     if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
   2443     {
   2444       $boundary_date = "$1";
   2445       last;
   2446     }
   2447   }
   2448 
   2449   close (LOG);
   2450 
   2451   # convert time from utc to local timezone if the ChangeLog has
   2452   # dates/times in utc
   2453   if ($UTC_Times && $boundary_date)
   2454   {
   2455     # convert the utc time to a time value
   2456     my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
   2457       m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
   2458     my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
   2459     # print the timevalue in the local timezone
   2460     my ($ignore,$wday);
   2461     ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
   2462     $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
   2463                             $year+1900,$mon+1,$mday,$hour,$min);
   2464   }
   2465 
   2466   return $boundary_date;
   2467 }
   2468 
   2469 # -------------------------------------
   2470 
   2471 # Fills up a ChangeLog structure in the current directory.
   2472 sub derive_changelog {
   2473   my ($command) = @_;
   2474 
   2475   # See "The Plan" above for a full explanation.
   2476 
   2477   # Might be adding to an existing ChangeLog
   2478   my $accumulation_date = maybe_grab_accumulation_date;
   2479   if ($accumulation_date) {
   2480     # Insert -d immediately after 'cvs log'
   2481     my $Log_Date_Command = "-d>${accumulation_date}";
   2482 
   2483     my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
   2484     splice @$command, $log_index+1, 0, $Log_Date_Command;
   2485     &debug ("(adding log msg starting from $accumulation_date)\n");
   2486   }
   2487 
   2488 #  output_changelog(read_changelog($command));
   2489   my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
   2490   $builder->read_changelog($command);
   2491   $builder->grand_poobah->output_changelog;
   2492 }
   2493 
   2494 # -------------------------------------
   2495 
   2496 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
   2497 
   2498 # -------------------------------------
   2499 
   2500 sub common_path_prefix {
   2501   my ($path1, $path2) = @_;
   2502 
   2503   # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
   2504   # terms, and mould windoze filenames to match.  Is this really appropriate?
   2505   # If a file is checked in under UN*X, and cvs log run on windoze, which way
   2506   # do the path separators slope?  Can we use fileparse as per the local
   2507   # conventions?  If so, we should probably have a user option to specify an
   2508   # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
   2509   # the nasty \-/ transmogrification below.
   2510 
   2511   my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
   2512 
   2513   # Transmogrify Windows filenames to look like Unix.
   2514   # (It is far more likely that someone is running cvs2cl.pl under
   2515   # Windows than that they would genuinely have backslashes in their
   2516   # filenames.)
   2517   tr!\\!/!
   2518     for $dir1, $dir2;
   2519 
   2520   my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
   2521 
   2522   my @path1 = grep length($_), split qr!/!, $dir1;
   2523   my @path2 = grep length($_), split qr!/!, $dir2;
   2524 
   2525   my @common_path;
   2526   for (0..min($#path1,$#path2)) {
   2527     if ( $path1[$_] eq $path2[$_]) {
   2528       push @common_path, $path1[$_];
   2529     } else {
   2530       last;
   2531     }
   2532   }
   2533 
   2534   return join '', map "$_/", @common_path;
   2535 }
   2536 
   2537 # -------------------------------------
   2538 
   2539 sub parse_options {
   2540   # Check this internally before setting the global variable.
   2541   my $output_file;
   2542 
   2543   # If this gets set, we encountered unknown options and will exit at
   2544   # the end of this subroutine.
   2545   my $exit_with_admonishment = 0;
   2546 
   2547   # command to generate the log
   2548   my @log_source_command = qw( cvs log );
   2549 
   2550   my (@Global_Opts, @Local_Opts);
   2551 
   2552   Getopt::Long::Configure(qw( bundling permute no_getopt_compat
   2553                               pass_through no_ignore_case ));
   2554   GetOptions('help|usage|h'   => \$Print_Usage,
   2555              'debug'          => \$Debug,        # unadvertised option, heh
   2556              'version'        => \$Print_Version,
   2557 
   2558              'file|f=s'       => \$output_file,
   2559              'accum'          => \$Cumulative,
   2560              'update'         => \$Update,
   2561              'fsf'            => \$FSF_Style,
   2562              'rcs=s'          => \$RCS_Root,
   2563              'usermap|U=s'    => \$User_Map_File,
   2564              'gecos'          => \$Gecos,
   2565              'domain=s'       => \$Domain,
   2566              'passwd=s'       => \$User_Passwd_File,
   2567              'window|W=i'     => \$Max_Checkin_Duration,
   2568              'chrono'         => \$Chronological_Order,
   2569              'ignore|I=s'     => \@Ignore_Files,
   2570              'case-insensitive|C' => \$Case_Insensitive,
   2571              'regexp|R=s'     => \$Regexp_Gate,
   2572              'stdin'          => \$Input_From_Stdin,
   2573              'stdout'         => \$Output_To_Stdout,
   2574              'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
   2575              'prune|P'        => \$Prune_Empty_Msgs,
   2576              'no-wrap'        => \$No_Wrap,
   2577              'gmt|utc'        => \$UTC_Times,
   2578              'day-of-week|w'  => \$Show_Day_Of_Week,
   2579              'revisions|r'    => \$Show_Revisions,
   2580              'show-dead'      => \$Show_Dead,
   2581              'tags|t'         => \$Show_Tags,
   2582              'tag-regexp=s'   => \$Regexp_Tag,
   2583              'tagdates|T'     => \$Show_Tag_Dates,
   2584              'branches|b'     => \$Show_Branches,
   2585              'follow|F=s'     => \@Follow_Branches,
   2586              'follow-only=s'  => \@Follow_Only,
   2587              'xml-encoding=s' => \$XML_Encoding,
   2588              'xml'            => \$XML_Output,
   2589              'noxmlns'        => \$No_XML_Namespace,
   2590              'no-xml-iso-date' => \$No_XML_ISO_Date,
   2591              'no-ancestors'   => \$No_Ancestors,
   2592              'lines-modified' => \$Show_Lines_Modified,
   2593 
   2594              'no-indent'    => sub {
   2595                $Indent = '';
   2596              },
   2597 
   2598              'summary'      => sub {
   2599                $Summary = 1;
   2600                $After_Header = "\n\n"; # Summary implies --separate-header
   2601              },
   2602 
   2603              'no-times'     => sub {
   2604                $Show_Times = 0;
   2605              },
   2606 
   2607              'no-hide-branch-additions' => sub {
   2608                $Hide_Branch_Additions = 0;
   2609              },
   2610 
   2611              'no-common-dir'  => sub {
   2612                $Common_Dir = 0;
   2613              },
   2614 
   2615              'ignore-tag=s'   => sub {
   2616                $ignore_tags{$_[1]} = 1;
   2617              },
   2618 
   2619              'show-tag=s'     => sub {
   2620                $show_tags{$_[1]} = 1;
   2621              },
   2622 
   2623              # Deliberately undocumented.  This is not a public interface, and
   2624              # may change/disappear at any time.
   2625              'test-code=s'    => \$TestCode,
   2626 
   2627              'delta=s'        => sub {
   2628                my $arg = $_[1];
   2629                if ( $arg =~
   2630                     /^([A-Za-z][A-Za-z0-9_\-\]\[\.]*)?:([A-Za-z][A-Za-z0-9_\-\]\[\.]*)?$/ )
   2631                {
   2632                  $Delta_From = $1;
   2633                  $Delta_To = $2;
   2634                  $Delta_Mode = 1;
   2635                } else {
   2636                  die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
   2637                }
   2638              },
   2639 
   2640              'FSF'             => sub {
   2641                $FSF_Output = 1;
   2642                $Show_Times = 0;
   2643                $Common_Dir = 0;
   2644                $No_Extra_Indent = 1;
   2645                $Indent = "\t";
   2646              },
   2647 
   2648              'header=s'        => sub {
   2649                my $narg = $_[1];
   2650                $ChangeLog_Header = &slurp_file ($narg);
   2651                if (! defined ($ChangeLog_Header)) {
   2652                  $ChangeLog_Header = '';
   2653                }
   2654              },
   2655 
   2656              'global-opts|g=s' => sub {
   2657                my $narg = $_[1];
   2658                push @Global_Opts, $narg;
   2659                splice @log_source_command, 1, 0, $narg;
   2660              },
   2661 
   2662              'log-opts|l=s' => sub {
   2663                my $narg = $_[1];
   2664                push @Local_Opts, $narg;
   2665                push @log_source_command, $narg;
   2666              },
   2667 
   2668              'mailname=s'   => sub {
   2669                my $narg = $_[1];
   2670                warn "--mailname is deprecated; please use --domain instead\n";
   2671                $Domain = $narg;
   2672              },
   2673 
   2674              'separate-header|S' => sub {
   2675                $After_Header = "\n\n";
   2676                $No_Extra_Indent = 1;
   2677              },
   2678 
   2679              'group-by-date' => sub {
   2680                $GroupByDate = 1;
   2681                $Show_Times = 0;
   2682              },
   2683   
   2684              'group-by-author' => sub {
   2685                $GroupByDate = 1;
   2686                $GroupByAuthor = 1;
   2687                $Show_Times = 0;
   2688              },
   2689 
   2690              'hide-filenames' => sub {
   2691                $Hide_Filenames = 1;
   2692                $After_Header = '';
   2693              },
   2694             )
   2695     or die "options parsing failed\n";
   2696 
   2697   push @log_source_command, map "$_", @ARGV;
   2698 
   2699   ## Check for contradictions...
   2700 
   2701   if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
   2702     print STDERR "cannot pass both --stdout and --distributed\n";
   2703     $exit_with_admonishment = 1;
   2704   }
   2705 
   2706   if ($Output_To_Stdout && $output_file) {
   2707     print STDERR "cannot pass both --stdout and --file\n";
   2708     $exit_with_admonishment = 1;
   2709   }
   2710 
   2711   if ($Input_From_Stdin && @Global_Opts) {
   2712     print STDERR "cannot pass both --stdin and -g\n";
   2713     $exit_with_admonishment = 1;
   2714   }
   2715 
   2716   if ($Input_From_Stdin && @Local_Opts) {
   2717     print STDERR "cannot pass both --stdin and -l\n";
   2718     $exit_with_admonishment = 1;
   2719   }
   2720 
   2721   if ($XML_Output && $Cumulative) {
   2722     print STDERR "cannot pass both --xml and --accum\n";
   2723     $exit_with_admonishment = 1;
   2724   }
   2725 
   2726   if ($FSF_Output && $Cumulative) {
   2727     print STDERR "cannot pass both --FSF and --accum\n";
   2728     $exit_with_admonishment = 1;
   2729   }
   2730 
   2731   # Other consistency checks and option-driven logic
   2732 
   2733   # Bleargh.  Compensate for a deficiency of custom wrapping.
   2734   if ( ($After_Header ne " ") and $FSF_Style ) {
   2735     $After_Header .= "\t";
   2736   }
   2737 
   2738   @Ignore_Files = map lc, @Ignore_Files
   2739     if $Case_Insensitive;
   2740 
   2741   # Or if any other error message has already been printed out, we
   2742   # just leave now:
   2743   if ($exit_with_admonishment) {
   2744     &usage ();
   2745     exit (1);
   2746   }
   2747   elsif ($Print_Usage) {
   2748     &usage ();
   2749     exit (0);
   2750   }
   2751   elsif ($Print_Version) {
   2752     &version ();
   2753     exit (0);
   2754   }
   2755 
   2756   ## Else no problems, so proceed.
   2757 
   2758   if ($output_file) {
   2759     $Log_File_Name = $output_file;
   2760   }
   2761 
   2762   return \@log_source_command;
   2763 }
   2764 
   2765 # -------------------------------------
   2766 
   2767 sub slurp_file {
   2768   my $filename = shift || die ("no filename passed to slurp_file()");
   2769   my $retstr;
   2770 
   2771   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
   2772   local $/ = undef;
   2773   $retstr = <SLURPEE>;
   2774   close (SLURPEE);
   2775   return $retstr;
   2776 }
   2777 
   2778 # -------------------------------------
   2779 
   2780 sub debug {
   2781   if ($Debug) {
   2782     my $msg = shift;
   2783     print STDERR $msg;
   2784   }
   2785 }
   2786 
   2787 # -------------------------------------
   2788 
   2789 sub version {
   2790   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
   2791 }
   2792 
   2793 # -------------------------------------
   2794 
   2795 sub usage {
   2796   &version ();
   2797 
   2798   eval "use Pod::Usage qw( pod2usage )";
   2799 
   2800    if ( $@ ) {
   2801     print <<'END';
   2802 
   2803 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
   2804   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
   2805   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
   2806   install Pod::Usage direct from CPAN.
   2807 END
   2808 
   2809     local $/ = undef;
   2810     my $message = <DATA>;
   2811     $message =~ s/^=(head1|item) //gm;
   2812     $message =~ s/^=(over|back).*\n//gm;
   2813     $message =~ s/\n{3,}/\n\n/g;
   2814     print $message;
   2815   } else {
   2816     print "\n";
   2817     pod2usage( -exitval => 'NOEXIT',
   2818                -verbose => 1,
   2819                -output  => \*STDOUT,
   2820              );
   2821   }
   2822 
   2823   return;
   2824 }
   2825 
   2826 # Main -----------------------------------------------------------------------
   2827 
   2828 my $log_source_command = parse_options;
   2829 if ( defined $TestCode ) {
   2830   eval $TestCode;
   2831   die "Eval failed: '$@'\n"
   2832     if $@;
   2833 } else {
   2834   derive_changelog($log_source_command);
   2835 }
   2836 
   2837 __DATA__
   2838 
   2839 =head1 NAME
   2840 
   2841 cvs2cl.pl - convert cvs log messages to changelogs
   2842 
   2843 =head1 SYNOPSIS
   2844 
   2845 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
   2846 
   2847 =head1 DESCRIPTION
   2848 
   2849 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
   2850 running "cvs log" and parsing the output. Duplicate log messages get
   2851 unified in the Right Way.
   2852 
   2853 The default output of cvs2cl is designed to be compact, formally unambiguous,
   2854 but still easy for humans to read.  It should be largely self-explanatory; the
   2855 one abbreviation that might not be obvious is "utags".  That stands for
   2856 "universal tags" -- a universal tag is one held by all the files in a given
   2857 change entry.
   2858 
   2859 If you need output that's easy for a program to parse, use the B<--xml> option.
   2860 Note that with XML output, just about all available information is included
   2861 with each change entry, whether you asked for it or not, on the theory that
   2862 your parser can ignore anything it's not looking for.
   2863 
   2864 If filenames are given as arguments cvs2cl only shows log information for the
   2865 named files.
   2866 
   2867 =head1 OPTIONS
   2868 
   2869 =over 4
   2870 
   2871 =item B<-h>, B<-help>, B<--help>, B<-?>
   2872 
   2873 Show a short help and exit.
   2874 
   2875 =item B<--version>
   2876 
   2877 Show version and exit.
   2878 
   2879 =item B<-r>, B<--revisions>
   2880 
   2881 Show revision numbers in output.
   2882 
   2883 =item B<-b>, B<--branches>
   2884 
   2885 Show branch names in revisions when possible.
   2886 
   2887 =item B<-t>, B<--tags>
   2888 
   2889 Show tags (symbolic names) in output.
   2890 
   2891 =item B<-T>, B<--tagdates>
   2892 
   2893 Show tags in output on their first occurance.
   2894 
   2895 =item B<--show-dead>
   2896 
   2897 Show dead files.
   2898 
   2899 =item B<--stdin>
   2900 
   2901 Read from stdin, don't run cvs log.
   2902 
   2903 =item B<--stdout>
   2904 
   2905 Output to stdout not to ChangeLog.
   2906 
   2907 =item B<-d>, B<--distributed>
   2908 
   2909 Put ChangeLogs in subdirs.
   2910 
   2911 =item B<-f> I<FILE>, B<--file> I<FILE>
   2912 
   2913 Write to I<FILE> instead of ChangeLog.
   2914 
   2915 =item B<--fsf>
   2916 
   2917 Use this if log data is in FSF ChangeLog style.
   2918 
   2919 =item B<--FSF>
   2920 
   2921 Attempt strict FSF-standard compatible output (incompatible with B<--accum>).
   2922 
   2923 =item B<-W> I<SECS>, B<--window> I<SECS>
   2924 
   2925 Window of time within which log entries unify.
   2926 
   2927 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
   2928 
   2929 Expand usernames to email addresses from I<UFILE>.
   2930 
   2931 =item B<--passwd> I<PASSWORDFILE>
   2932 
   2933 Use system passwd file for user name expansion.  If no mail domain is provided
   2934 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
   2935 -d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
   2936 those options is successful. Use a domain of '' to prevent the addition of a
   2937 mail domain.
   2938 
   2939 =item B<--domain> I<DOMAIN>
   2940 
   2941 Domain to build email addresses from.
   2942 
   2943 =item B<--gecos>
   2944 
   2945 Get user information from GECOS data.
   2946 
   2947 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
   2948 
   2949 Include only entries that match I<REGEXP>.  This option may be used multiple
   2950 times.
   2951 
   2952 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
   2953 
   2954 Ignore files whose names match I<REGEXP>.  This option may be used multiple
   2955 times.  The regexp is a perl regular expression.  It is matched as is; you may
   2956 want to prefix with a ^ or suffix with a $ to anchor the match.
   2957 
   2958 =item B<-C>, B<--case-insensitive>
   2959 
   2960 Any regexp matching is done case-insensitively.
   2961 
   2962 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
   2963 
   2964 Show only revisions on or ancestral to I<BRANCH>.
   2965 
   2966 =item B<--follow-only> I<BRANCH>
   2967 
   2968 Like --follow, but sub-branches are not followed.
   2969 
   2970 =item B<--no-ancestors>
   2971 
   2972 When using B<-F>, only track changes since the I<BRANCH> started.
   2973 
   2974 =item B<--no-hide-branch-additions>
   2975 
   2976 By default, entries generated by cvs for a file added on a branch (a dead 1.1
   2977 entry) are not shown.  This flag reverses that action.
   2978 
   2979 =item B<-S>, B<--separate-header>
   2980 
   2981 Blank line between each header and log message.
   2982 
   2983 =item B<--group-by-date>
   2984 
   2985 Group ChangeLog entries on the same date together, instead of having a
   2986 separate entry for each commit on that date.
   2987 
   2988 =item B<--group-by-author>
   2989 
   2990 Group consecutive ChangeLog entries from same author during same date,
   2991 instead of having separate entry for each commit.
   2992 
   2993 =item B<--summary>
   2994 
   2995 Add CVS change summary information.
   2996 
   2997 =item B<--no-wrap>
   2998 
   2999 Don't auto-wrap log message (recommend B<-S> also).
   3000 
   3001 =item B<--no-indent>
   3002 
   3003 Don't indent log message
   3004 
   3005 =item B<--gmt>, B<--utc>
   3006 
   3007 Show times in GMT/UTC instead of local time.
   3008 
   3009 =item B<--accum>
   3010 
   3011 Add to an existing ChangeLog (incompatible with B<--xml> and B<--FSF>).
   3012 
   3013 =item B<-w>, B<--day-of-week>
   3014 
   3015 Show day of week.
   3016 
   3017 =item B<--no-times>
   3018 
   3019 Don't show times in output.
   3020 
   3021 =item B<--chrono>
   3022 
   3023 Output log in chronological order (default is reverse chronological order).
   3024 
   3025 =item B<--header> I<FILE>
   3026 
   3027 Get ChangeLog header from I<FILE> ("B<->" means stdin).
   3028 
   3029 =item B<--xml>
   3030 
   3031 Output XML instead of ChangeLog format (incompatible with B<--accum>).
   3032 
   3033 =item B<--xml-encoding> I<ENCODING.>
   3034 
   3035 Insert encoding clause in XML header.
   3036 
   3037 =item B<--noxmlns>
   3038 
   3039 Don't include xmlns= attribute in root element.
   3040 
   3041 =item B<--hide-filenames>
   3042 
   3043 Don't show filenames (ignored for XML output).
   3044 
   3045 =item B<--no-common-dir>
   3046 
   3047 Don't shorten directory names from filenames.
   3048 
   3049 =item B<--rcs> I<CVSROOT>
   3050 
   3051 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
   3052 output, stripping the prefix I<CVSROOT>.
   3053 
   3054 =item B<-P>, B<--prune>
   3055 
   3056 Don't show empty log messages.
   3057 
   3058 =item B<--lines-modified>
   3059 
   3060 Output the number of lines added and the number of lines removed for
   3061 each checkin (if applicable). At the moment, this only affects the
   3062 XML output mode.
   3063 
   3064 =item B<--ignore-tag> I<TAG>
   3065 
   3066 Ignore individual changes that are associated with a given tag.
   3067 May be repeated, if so, changes that are associated with any of
   3068 the given tags are ignored.
   3069 
   3070 =item B<--show-tag> I<TAG>
   3071 
   3072 Log only individual changes that are associated with a given
   3073 tag.  May be repeated, if so, changes that are associated with
   3074 any of the given tags are logged.
   3075 
   3076 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
   3077 
   3078 Attempt a delta between two tags (since I<FROM_TAG> up to and
   3079 including I<TO_TAG>).  The algorithm is a simple date-based one
   3080 (this is a hard problem) so results are imperfect.
   3081 
   3082 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
   3083 
   3084 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
   3085 
   3086 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
   3087 
   3088 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
   3089 
   3090 =back
   3091 
   3092 Notes about the options and arguments:
   3093 
   3094 =over 4
   3095 
   3096 =item *
   3097 
   3098 The B<-I> and B<-F> options may appear multiple times.
   3099 
   3100 =item *
   3101 
   3102 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
   3103 okay because no would ever, ever be crazy enough to name a branch "trunk",
   3104 right?  Right.
   3105 
   3106 =item *
   3107 
   3108 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
   3109 each line of I<UFILE> looks like this:
   3110 
   3111        jrandom:jrandom (at] red-bean.com
   3112 
   3113 or maybe even like this
   3114 
   3115        jrandom:'Jesse Q. Random <jrandom (at] red-bean.com>'
   3116 
   3117 Don't forget to quote the portion after the colon if necessary.
   3118 
   3119 =item *
   3120 
   3121 Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
   3122 
   3123        cvs2cl.pl -l "-d'DATESPEC'"
   3124 
   3125 where DATESPEC is any date specification valid for "cvs log -d".  (Note that
   3126 CVS 1.10.7 and below requires there be no space between -d and its argument).
   3127 
   3128 =item *
   3129 
   3130 Dates/times are interpreted in the local time zone.
   3131 
   3132 =item *
   3133 
   3134 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
   3135 spaces as argument separators.
   3136 
   3137 =item *
   3138 
   3139 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
   3140 systems) for more information.
   3141 
   3142 =item *
   3143 
   3144 Note that the rules for quoting under windows shells are different.
   3145 
   3146 =item *
   3147 
   3148 To run in an automated environment such as CGI or PHP, suidperl may be needed
   3149 in order to execute as the correct user to enable /cvsroot read lock files to
   3150 be written for the 'cvs log' command.  This is likely just a case of changing
   3151 the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
   3152 PATH variable.
   3153 
   3154 =back
   3155 
   3156 =head1 EXAMPLES
   3157 
   3158 Some examples (working on UNIX shells):
   3159 
   3160       # logs after 6th March, 2003 (inclusive)
   3161       cvs2cl.pl -l "-d'>2003-03-06'"
   3162       # logs after 4:34PM 6th March, 2003 (inclusive)
   3163       cvs2cl.pl -l "-d'>2003-03-06 16:34'"
   3164       # logs between 4:46PM 6th March, 2003 (exclusive) and
   3165       # 4:34PM 6th March, 2003 (inclusive)
   3166       cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
   3167 
   3168 Some examples (on non-UNIX shells):
   3169 
   3170       # Reported to work on windows xp/2000
   3171       cvs2cl.pl -l  "-d"">2003-10-18;today<"""
   3172 
   3173 =head1 AUTHORS
   3174 
   3175 =over 4
   3176 
   3177 =item Karl Fogel
   3178 
   3179 =item Melissa O'Neill
   3180 
   3181 =item Martyn J. Pearce
   3182 
   3183 =back
   3184 
   3185 Contributions from
   3186 
   3187 =over 4
   3188 
   3189 =item Mike Ayers
   3190 
   3191 =item Tim Bradshaw
   3192 
   3193 =item Richard Broberg
   3194 
   3195 =item Nathan Bryant
   3196 
   3197 =item Oswald Buddenhagen
   3198 
   3199 =item Neil Conway
   3200 
   3201 =item Arthur de Jong
   3202 
   3203 =item Mark W. Eichin
   3204 
   3205 =item Dave Elcock
   3206 
   3207 =item Reid Ellis
   3208 
   3209 =item Simon Josefsson
   3210 
   3211 =item Robin Hugh Johnson
   3212 
   3213 =item Terry Kane
   3214 
   3215 =item Pete Kempf
   3216 
   3217 =item Akos Kiss
   3218 
   3219 =item Claus Klein
   3220 
   3221 =item Eddie Kohler
   3222 
   3223 =item Richard Laager
   3224 
   3225 =item Kevin Lilly
   3226 
   3227 =item Karl-Heinz Marbaise
   3228 
   3229 =item Mitsuaki Masuhara
   3230 
   3231 =item Henrik Nordstrom
   3232 
   3233 =item Joe Orton
   3234 
   3235 =item Peter Palfrader
   3236 
   3237 =item Thomas Parmelan
   3238 
   3239 =item Jordan Russell
   3240 
   3241 =item Jacek Sliwerski
   3242 
   3243 =item Johannes Stezenbach
   3244 
   3245 =item Joseph Walton
   3246 
   3247 =item Ernie Zapata
   3248 
   3249 =back
   3250 
   3251 =head1 BUGS
   3252 
   3253 Please report bugs to C<cvs2cl-reports {_AT_} red-bean.com>.
   3254 
   3255 =head1 PREREQUISITES
   3256 
   3257 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
   3258 also seems to require C<Perl 5.004_04> or higher.
   3259 
   3260 =head1 OPERATING SYSTEM COMPATIBILITY
   3261 
   3262 Should work on any OS.
   3263 
   3264 =head1 SCRIPT CATEGORIES
   3265 
   3266 Version_Control/CVS
   3267 
   3268 =head1 COPYRIGHT
   3269 
   3270 (C) 2001,2002,2003,2004 Martyn J. Pearce, under the GNU GPL.
   3271 
   3272 (C) 1999 Karl Fogel, under the GNU GPL.
   3273 
   3274 cvs2cl.pl is free software; you can redistribute it and/or modify
   3275 it under the terms of the GNU General Public License as published by
   3276 the Free Software Foundation; either version 2, or (at your option)
   3277 any later version.
   3278 
   3279 cvs2cl.pl is distributed in the hope that it will be useful,
   3280 but WITHOUT ANY WARRANTY; without even the implied warranty of
   3281 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   3282 GNU General Public License for more details.
   3283 
   3284 You may have received a copy of the GNU General Public License
   3285 along with cvs2cl.pl; see the file COPYING.  If not, write to the
   3286 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   3287 Boston, MA 02111-1307, USA.
   3288 
   3289 =head1 SEE ALSO
   3290 
   3291 cvs(1)
   3292 
   3293