1 #! /usr/bin/perl 2 # 3 # 4 # hyphenex.pl 5 # 6 # This small filter converts a hyphenation exception log article for 7 # TUGBoat to a real \hyphenation block. 8 # 9 # Written by Werner Lemberg <wl (at] gnu.org>. 10 # 11 # Version 1.1 (2003/04/17) 12 # 13 # Public domain. 14 # 15 # 16 # Usage: 17 # 18 # [perl] hyphenex.pl < tugboat-article > hyphenation-exceptions 19 20 # print header 21 print "% Hyphenation exceptions for US English,\n"; 22 print "% based on the hyphenation exception log article in TUGBoat.\n"; 23 print "%\n"; 24 print "% This is an automatically generated file. Do not edit!\n"; 25 print "%\n"; 26 print "% Please contact Barbara Beeton <bnb\@ams.org>\n"; 27 print "% for corrections and omissions.\n"; 28 print "\n"; 29 print "\\hyphenation{\n"; 30 31 while (<>) { 32 # retain only lines starting with \1 ... \6 or \tabalign 33 next if not (m/^\\[123456]/ || m/^\\tabalign/); 34 # remove final newline 35 chop; 36 # remove all TeX commands except \1 ... \6 37 s/\\[^123456\s{]+//g; 38 # remove all paired { ... } 39 1 while s/{(.*?)}/\1/g; 40 # skip lines which now have only whitespace before `&' 41 next if m/^\s*&/; 42 # remove comments 43 s/%.*//; 44 # remove trailing whitespace 45 s/\s*$//; 46 # remove trailing `*' (used as a marker in the document) 47 s/\*$//; 48 # split at whitespace 49 @field = split(' '); 50 if ($field[0] eq "\\1" || $field[0] eq "\\4") { 51 print " $field[2]\n"; 52 } 53 elsif ($field[0] eq "\\2" || $field[0] eq "\\5") { 54 print " $field[2]\n"; 55 # handle multiple suffixes separated by commata 56 @suffix_list = split(/,/, "$field[3]"); 57 foreach $suffix (@suffix_list) { 58 print " $field[2]$suffix\n"; 59 } 60 } 61 elsif ($field[0] eq "\\3" || $field[0] eq "\\6") { 62 # handle multiple suffixes separated by commata 63 @suffix_list = split(/,/, "$field[3],$field[4]"); 64 foreach $suffix (@suffix_list) { 65 print " $field[2]$suffix\n"; 66 } 67 } 68 else { 69 # for `&', split at `&' with trailing whitespace 70 @field = split(/&\s*/); 71 print " $field[1]\n"; 72 } 73 } 74 75 # print trailer 76 print "}\n"; 77 print "\n"; 78 print "% EOF\n"; 79