Home | History | Annotate | Line # | Download | only in header-tools
reduce-headers revision 1.4
      1 #! /usr/bin/python2
      2 import os.path
      3 import sys
      4 import shlex
      5 import re
      6 import tempfile
      7 import copy
      8 
      9 from headerutils import *
     10 
     11 requires = { }
     12 provides = { }
     13 
     14 no_remove = [ "system.h", "coretypes.h", "config.h" , "bconfig.h", "backend.h" ]
     15 
     16 # These targets are the ones which provide "coverage".  Typically, if any
     17 # target is going to fail compilation, it's one of these.  This was determined
     18 # during the initial runs of reduce-headers... On a full set of target builds,
     19 # every failure which occured was triggered by one of these.  
     20 # This list is used during target-list construction simply to put any of these
     21 # *first* in the candidate list, increasing the probability that a failure is 
     22 # found quickly.
     23 target_priority = [
     24     "aarch64-linux-gnu",
     25     "arm-netbsdelf",
     26     "c6x-elf",
     27     "epiphany-elf",
     28     "hppa2.0-hpux10.1",
     29     "i686-mingw32crt",
     30     "i686-pc-msdosdjgpp",
     31     "mipsel-elf",
     32     "powerpc-eabisimaltivec",
     33     "rs6000-ibm-aix5.1.0",
     34     "sh-superh-elf",
     35     "sparc64-elf"
     36 ]
     37 
     38 
     39 target_dir = ""
     40 build_dir = ""
     41 ignore_list = list()
     42 target_builds = list()
     43 
     44 target_dict = { }
     45 header_dict = { }
     46 search_path = [ ".", "../include", "../libcpp/include" ]
     47 
     48 remove_count = { }
     49 
     50 
     51 # Given a header name, normalize it.  ie.  cp/cp-tree.h could be in gcc, while
     52 # the same header could be referenced from within the cp subdirectory as
     53 # just cp-tree.h
     54 # for now, just assume basenames are unique
     55 
     56 def normalize_header (header):
     57   return os.path.basename (header)
     58 
     59 
     60 # Adds a header file and its sub-includes to the global dictionary if they
     61 # aren't already there.  Specify s_path since different build directories may
     62 # append themselves on demand to the global list.
     63 # return entry for the specified header, knowing all sub entries are completed
     64 
     65 def get_header_info (header, s_path):
     66   global header_dict
     67   global empty_iinfo
     68   process_list = list ()
     69   location = ""
     70   bname = ""
     71   bname_iinfo = empty_iinfo
     72   for path in s_path:
     73     if os.path.exists (path + "/" + header):
     74       location = path + "/" + header
     75       break
     76 
     77   if location:
     78     bname = normalize_header (location)
     79     if header_dict.get (bname):
     80       bname_iinfo = header_dict[bname]
     81       loc2 = ii_path (bname_iinfo)+ "/" + bname
     82       if loc2[:2] == "./":
     83         loc2 = loc2[2:]
     84       if location[:2] == "./":
     85         location = location[2:]
     86       if loc2 != location:
     87         # Don't use the cache if it isnt the right one.
     88         bname_iinfo = process_ii_macro (location)
     89       return bname_iinfo
     90 
     91     bname_iinfo = process_ii_macro (location)
     92     header_dict[bname] = bname_iinfo
     93     # now decend into the include tree
     94     for i in ii_include_list (bname_iinfo):
     95       get_header_info (i, s_path)
     96   else:
     97     # if the file isnt in the source directories, look in the build and target
     98     # directories. If it is here, then aggregate all the versions.
     99     location = build_dir + "/gcc/" + header
    100     build_inc = target_inc = False
    101     if os.path.exists (location):
    102       build_inc = True
    103     for x in target_dict:
    104       location = target_dict[x] + "/gcc/" + header
    105       if os.path.exists (location):
    106         target_inc = True
    107         break
    108 
    109     if (build_inc or target_inc):
    110       bname = normalize_header(header)
    111       defines = set()
    112       consumes = set()
    113       incl = set()
    114       if build_inc:
    115         iinfo = process_ii_macro (build_dir + "/gcc/" + header)
    116         defines = set (ii_macro_define (iinfo))
    117         consumes = set (ii_macro_consume (iinfo))
    118         incl = set (ii_include_list (iinfo))
    119 
    120       if (target_inc):
    121         for x in target_dict:
    122           location = target_dict[x] + "/gcc/" + header
    123           if os.path.exists (location):
    124             iinfo = process_ii_macro (location)
    125             defines.update (ii_macro_define (iinfo))
    126             consumes.update (ii_macro_consume (iinfo))
    127             incl.update (ii_include_list (iinfo))
    128 
    129       bname_iinfo = (header, "build", list(incl), list(), list(consumes), list(defines), list(), list())
    130 
    131       header_dict[bname] = bname_iinfo
    132       for i in incl:
    133         get_header_info (i, s_path)
    134 
    135   return bname_iinfo
    136 
    137 
    138 # return a list of all headers brought in by this header
    139 def all_headers (fname):
    140   global header_dict
    141   headers_stack = list()
    142   headers_list = list()
    143   if header_dict.get (fname) == None:
    144     return list ()
    145   for y in ii_include_list (header_dict[fname]):
    146     headers_stack.append (y)
    147 
    148   while headers_stack:
    149     h = headers_stack.pop ()
    150     hn = normalize_header (h)
    151     if hn not in headers_list:
    152       headers_list.append (hn)
    153       if header_dict.get(hn):
    154         for y in ii_include_list (header_dict[hn]):
    155           if normalize_header (y) not in headers_list:
    156             headers_stack.append (y)
    157 
    158   return headers_list
    159 
    160 
    161 
    162 
    163 # Search bld_dir for all target tuples, confirm that they have a build path with
    164 # bld_dir/target-tuple/gcc, and build a dictionary of build paths indexed by
    165 # target tuple..
    166 
    167 def build_target_dict (bld_dir, just_these):
    168   global target_dict
    169   target_doct = { }
    170   error = False
    171   if os.path.exists (bld_dir):
    172     if just_these:
    173       ls = just_these
    174     else:
    175       ls = os.listdir(bld_dir)
    176     for t in ls:
    177       if t.find("-") != -1:
    178         target = t.strip()
    179         tpath = bld_dir + "/" + target
    180         if not os.path.exists (tpath + "/gcc"):
    181           print "Error: gcc build directory for target " + t + " Does not exist: " + tpath + "/gcc"
    182           error = True
    183         else:
    184           target_dict[target] = tpath
    185 
    186   if error:
    187     target_dict = { }
    188 
    189 def get_obj_name (src_file):
    190   if src_file[-2:] == ".c":
    191     return src_file.replace (".c", ".o")
    192   elif src_file[-3:] == ".cc":
    193     return src_file.replace (".cc", ".o")
    194   return ""
    195 
    196 def target_obj_exists (target, obj_name):
    197   global target_dict
    198   # look in a subdir if src has a subdir, then check gcc base directory.
    199   if target_dict.get(target):
    200     obj = target_dict[target] + "/gcc/" + obj_name
    201     if not os.path.exists (obj):
    202       obj = target_dict[target] + "/gcc/" + os.path.basename(obj_name)
    203     if os.path.exists (obj):
    204       return True
    205   return False
    206  
    207 # Given a src file, return a list of targets which may build this file.
    208 def find_targets (src_file):
    209   global target_dict
    210   targ_list = list()
    211   obj_name = get_obj_name (src_file)
    212   if not obj_name:
    213     print "Error: " + src_file + " - Cannot determine object name."
    214     return list()
    215 
    216   # Put the high priority targets which tend to trigger failures first
    217   for target in target_priority:
    218     if target_obj_exists (target, obj_name):
    219       targ_list.append ((target, target_dict[target]))
    220 
    221   for target in target_dict:
    222     if target not in target_priority and target_obj_exists (target, obj_name):
    223       targ_list.append ((target, target_dict[target]))
    224         
    225   return targ_list
    226 
    227 
    228 def try_to_remove (src_file, h_list, verbose):
    229   global target_dict
    230   global header_dict
    231   global build_dir
    232 
    233   # build from scratch each time
    234   header_dict = { }
    235   summary = ""
    236   rmcount = 0
    237 
    238   because = { }
    239   src_info = process_ii_macro_src (src_file)
    240   src_data = ii_src (src_info)
    241   if src_data:
    242     inclist = ii_include_list_non_cond (src_info)
    243     # work is done if there are no includes to check
    244     if not inclist:
    245       return src_file + ": No include files to attempt to remove"
    246 
    247     # work on the include list in reverse.
    248     inclist.reverse()
    249 
    250     # Get the target list 
    251     targ_list = list()
    252     targ_list = find_targets (src_file)
    253 
    254     spath = search_path
    255     if os.path.dirname (src_file):
    256       spath.append (os.path.dirname (src_file))
    257 
    258     hostbuild = True
    259     if src_file.find("config/") != -1:
    260       # config files dont usually build on the host
    261       hostbuild = False
    262       obn = get_obj_name (os.path.basename (src_file))
    263       if obn and os.path.exists (build_dir + "/gcc/" + obn):
    264         hostbuild = True
    265       if not target_dict:
    266         summary = src_file + ": Target builds are required for config files.  None found."
    267         print summary
    268         return summary
    269       if not targ_list:
    270         summary =src_file + ": Cannot find any targets which build this file."
    271         print summary
    272         return summary
    273 
    274     if hostbuild:
    275       # confirm it actually builds before we do anything
    276       print "Confirming source file builds"
    277       res = get_make_output (build_dir + "/gcc", "all")
    278       if res[0] != 0:
    279         message = "Error: " + src_file + " does not build currently."
    280         summary = src_file + " does not build on host."
    281         print message
    282         print res[1]
    283         if verbose:
    284           verbose.write (message + "\n")
    285           verbose.write (res[1]+ "\n")
    286         return summary
    287 
    288     src_requires = set (ii_macro_consume (src_info))
    289     for macro in src_requires:
    290       because[macro] = src_file
    291     header_seen = list ()
    292 
    293     os.rename (src_file, src_file + ".bak")
    294     src_orig = copy.deepcopy (src_data)
    295     src_tmp = copy.deepcopy (src_data)
    296 
    297     try:
    298       # process the includes from bottom to top.  This is because we know that
    299       # later includes have are known to be needed, so any dependency from this 
    300       # header is a true dependency
    301       for inc_file in inclist:
    302         inc_file_norm = normalize_header (inc_file)
    303         
    304         if inc_file in no_remove:
    305           continue
    306         if len (h_list) != 0 and inc_file_norm not in h_list:
    307           continue
    308         if inc_file_norm[0:3] == "gt-":
    309           continue
    310         if inc_file_norm[0:6] == "gtype-":
    311           continue
    312         if inc_file_norm.replace(".h",".c") == os.path.basename(src_file):
    313           continue
    314              
    315         lookfor = ii_src_line(src_info)[inc_file]
    316         src_tmp.remove (lookfor)
    317         message = "Trying " + src_file + " without " + inc_file
    318         print message
    319         if verbose:
    320           verbose.write (message + "\n")
    321         out = open(src_file, "w")
    322         for line in src_tmp:
    323           out.write (line)
    324         out.close()
    325           
    326         keep = False
    327         if hostbuild:
    328           res = get_make_output (build_dir + "/gcc", "all")
    329         else:
    330           res = (0, "")
    331 
    332         rc = res[0]
    333         message = "Passed Host build"
    334         if (rc != 0):
    335           # host build failed
    336           message  = "Compilation failed:\n";
    337           keep = True
    338         else:
    339           if targ_list:
    340             objfile = get_obj_name (src_file)
    341             t1 = targ_list[0]
    342             if objfile and os.path.exists(t1[1] +"/gcc/"+objfile):
    343               res = get_make_output_parallel (targ_list, objfile, 0)
    344             else:
    345               res = get_make_output_parallel (targ_list, "all-gcc", 0)
    346             rc = res[0]
    347             if rc != 0:
    348               message = "Compilation failed on TARGET : " + res[2]
    349               keep = True
    350             else:
    351               message = "Passed host and target builds"
    352 
    353         if keep:
    354           print message + "\n"
    355 
    356         if (rc != 0):
    357           if verbose:
    358             verbose.write (message + "\n");
    359             verbose.write (res[1])
    360             verbose.write ("\n");
    361             if os.path.exists (inc_file):
    362               ilog = open(inc_file+".log","a")
    363               ilog.write (message + " for " + src_file + ":\n\n");
    364               ilog.write ("============================================\n");
    365               ilog.write (res[1])
    366               ilog.write ("\n");
    367               ilog.close()
    368             if os.path.exists (src_file):
    369               ilog = open(src_file+".log","a")
    370               ilog.write (message + " for " +inc_file + ":\n\n");
    371               ilog.write ("============================================\n");
    372               ilog.write (res[1])
    373               ilog.write ("\n");
    374               ilog.close()
    375 
    376         # Given a sequence where :
    377         # #include "tm.h"
    378         # #include "target.h"  // includes tm.h
    379 
    380         # target.h was required, and when attempting to remove tm.h we'd see that
    381         # all the macro defintions are "required" since they all look like:
    382         # #ifndef HAVE_blah
    383         # #define HAVE_blah
    384         # endif
    385 
    386         # when target.h was found to be required, tm.h will be tagged as included.
    387         # so when we get this far, we know we dont have to check the macros for
    388         # tm.h since we know it is already been included.
    389 
    390         if inc_file_norm not in header_seen:
    391           iinfo = get_header_info (inc_file, spath)
    392           newlist = all_headers (inc_file_norm)
    393           if ii_path(iinfo) == "build" and not target_dict:
    394             keep = True
    395             text = message + " : Will not remove a build file without some targets."
    396             print text
    397             ilog = open(src_file+".log","a")
    398             ilog.write (text +"\n")
    399             ilog.write ("============================================\n");
    400             ilog.close()
    401             ilog = open("reduce-headers-kept.log","a")
    402             ilog.write (src_file + " " + text +"\n")
    403             ilog.close()
    404         else:
    405           newlist = list()
    406         if not keep and inc_file_norm not in header_seen:
    407           # now look for any macro requirements.
    408           for h in newlist:
    409             if not h in header_seen:
    410               if header_dict.get(h):
    411                 defined = ii_macro_define (header_dict[h])
    412                 for dep in defined:
    413                   if dep in src_requires and dep not in ignore_list:
    414                     keep = True;
    415                     text = message + ", but must keep " + inc_file + " because it provides " + dep 
    416                     if because.get(dep) != None:
    417                       text = text + " Possibly required by " + because[dep]
    418                     print text
    419                     ilog = open(inc_file+".log","a")
    420                     ilog.write (because[dep]+": Requires [dep] in "+src_file+"\n")
    421                     ilog.write ("============================================\n");
    422                     ilog.close()
    423                     ilog = open(src_file+".log","a")
    424                     ilog.write (text +"\n")
    425                     ilog.write ("============================================\n");
    426                     ilog.close()
    427                     ilog = open("reduce-headers-kept.log","a")
    428                     ilog.write (src_file + " " + text +"\n")
    429                     ilog.close()
    430                     if verbose:
    431                       verbose.write (text + "\n")
    432 
    433         if keep:
    434           # add all headers 'consumes' to src_requires list, and mark as seen
    435           for h in newlist:
    436             if not h in header_seen:
    437               header_seen.append (h)
    438               if header_dict.get(h):
    439                 consume = ii_macro_consume (header_dict[h])
    440                 for dep in consume:
    441                   if dep not in src_requires:
    442                     src_requires.add (dep)
    443                     if because.get(dep) == None:
    444                       because[dep] = inc_file
    445 
    446           src_tmp = copy.deepcopy (src_data)
    447         else:
    448           print message + "  --> removing " + inc_file + "\n"
    449           rmcount += 1
    450           if verbose:
    451             verbose.write (message + "  --> removing " + inc_file + "\n")
    452           if remove_count.get(inc_file) == None:
    453             remove_count[inc_file] = 1
    454           else:
    455             remove_count[inc_file] += 1
    456           src_data = copy.deepcopy (src_tmp)
    457     except:
    458       print "Interuption: restoring original file"
    459       out = open(src_file, "w")
    460       for line in src_orig:
    461         out.write (line)
    462       out.close()
    463       raise
    464 
    465     # copy current version, since it is the "right" one now.
    466     out = open(src_file, "w")
    467     for line in src_data:
    468       out.write (line)
    469     out.close()
    470     
    471     # Try a final host bootstrap build to make sure everything is kosher.
    472     if hostbuild:
    473       res = get_make_output (build_dir, "all")
    474       rc = res[0]
    475       if (rc != 0):
    476         # host build failed! return to original version
    477         print "Error: " + src_file + " Failed to bootstrap at end!!! restoring."
    478         print "        Bad version at " + src_file + ".bad"
    479         os.rename (src_file, src_file + ".bad")
    480         out = open(src_file, "w")
    481         for line in src_orig:
    482           out.write (line)
    483         out.close()
    484         return src_file + ": failed to build after reduction.  Restored original"
    485 
    486     if src_data == src_orig:
    487       summary = src_file + ": No change."
    488     else:
    489       summary = src_file + ": Reduction performed, "+str(rmcount)+" includes removed."
    490   print summary
    491   return summary
    492 
    493 only_h = list ()
    494 ignore_cond = False
    495 
    496 usage = False
    497 src = list()
    498 only_targs = list ()
    499 for x in sys.argv[1:]:
    500   if x[0:2] == "-b":
    501     build_dir = x[2:]
    502   elif x[0:2] == "-f":
    503     fn = normalize_header (x[2:])
    504     if fn not in only_h:
    505       only_h.append (fn)
    506   elif x[0:2] == "-h":
    507     usage = True
    508   elif x[0:2] == "-d":
    509     ignore_cond = True
    510   elif x[0:2] == "-D":
    511     ignore_list.append(x[2:])
    512   elif x[0:2] == "-T":
    513     only_targs.append(x[2:])
    514   elif x[0:2] == "-t":
    515     target_dir = x[2:]
    516   elif x[0] == "-":
    517     print "Error:  Unrecognized option " + x
    518     usgae = True
    519   else:
    520     if not os.path.exists (x):
    521       print "Error: specified file " + x + " does not exist."
    522       usage = True
    523     else:
    524       src.append (x)
    525 
    526 if target_dir:
    527   build_target_dict (target_dir, only_targs)
    528 
    529 if build_dir == "" and target_dir == "":
    530   print "Error: Must specify a build directory, and/or a target directory."
    531   usage = True
    532 
    533 if build_dir and not os.path.exists (build_dir):
    534     print "Error: specified build directory does not exist : " + build_dir
    535     usage = True
    536 
    537 if target_dir and not os.path.exists (target_dir):
    538     print "Error: specified target directory does not exist : " + target_dir
    539     usage = True
    540 
    541 if usage:
    542   print "Attempts to remove extraneous include files from source files."
    543   print " "
    544   print "Should be run from the main gcc source directory, and works on a target"
    545   print "directory, as we attempt to make the 'all' target."
    546   print " "
    547   print "By default, gcc-reorder-includes is run on each file before attempting"
    548   print "to remove includes. this removes duplicates and puts some headers in a"
    549   print "canonical ordering"
    550   print " "
    551   print "The build directory should be ready to compile via make. Time is saved"
    552   print "if the build is already complete, so that only changes need to be built."
    553   print " "
    554   print "Usage: [options] file1.c [file2.c] ... [filen.c]"
    555   print "      -bdir    : the root build directory to attempt buiding .o files."
    556   print "      -tdir    : the target build directory"
    557   print "      -d       : Ignore conditional macro dependencies."
    558   print " "
    559   print "      -Dmacro  : Ignore a specific macro for dependencies"
    560   print "      -Ttarget : Only consider target in target directory."
    561   print "      -fheader : Specifies a specific .h file to be considered."
    562   print " "
    563   print "      -D, -T, and -f can be specified mulitple times and are aggregated."
    564   print " "
    565   print "  The original file will be in filen.bak"
    566   print " "
    567   sys.exit (0)
    568  
    569 if only_h:
    570   print "Attempting to remove only these files:"
    571   for x in only_h:
    572     print x
    573   print " "
    574 
    575 logfile = open("reduce-headers.log","w")
    576 
    577 for x in src:
    578   msg = try_to_remove (x, only_h, logfile)
    579   ilog = open("reduce-headers.sum","a")
    580   ilog.write (msg + "\n")
    581   ilog.close()
    582 
    583 ilog = open("reduce-headers.sum","a")
    584 ilog.write ("===============================================================\n")
    585 for x in remove_count:
    586   msg = x + ": Removed " + str(remove_count[x]) + " times."
    587   print msg
    588   logfile.write (msg + "\n")
    589   ilog.write (msg + "\n")
    590 
    591 
    592 
    593 
    594 
    595