Home | History | Annotate | Line # | Download | only in tilepro
      1   1.1  mrg /* Multiply table generator for tile.
      2  1.10  mrg    Copyright (C) 2011-2022 Free Software Foundation, Inc.
      3   1.1  mrg    Contributed by Walter Lee (walt (at) tilera.com)
      4   1.1  mrg 
      5   1.1  mrg    This file is part of GCC.
      6   1.1  mrg 
      7   1.1  mrg    GCC is free software; you can redistribute it and/or modify it
      8   1.1  mrg    under the terms of the GNU General Public License as published
      9   1.1  mrg    by the Free Software Foundation; either version 3, or (at your
     10   1.1  mrg    option) any later version.
     11   1.1  mrg 
     12   1.1  mrg    GCC is distributed in the hope that it will be useful, but WITHOUT
     13   1.1  mrg    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14   1.1  mrg    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     15   1.1  mrg    License for more details.
     16   1.1  mrg 
     17   1.1  mrg    You should have received a copy of the GNU General Public License
     18   1.1  mrg    along with GCC; see the file COPYING3.  If not see
     19   1.1  mrg    <http://www.gnu.org/licenses/>.  */
     20   1.1  mrg 
     21   1.1  mrg /* This program creates a table used to compile multiply by constant
     22   1.1  mrg    efficiently.
     23   1.1  mrg 
     24   1.1  mrg    This program should be compiled by a c++ compiler.  If it's
     25   1.4  mrg    compiled with -DTILEPRO, it generates the multiply table for
     26   1.1  mrg    TILEPro; otherwise it generates the multiply table for TILE-Gx.
     27   1.1  mrg    Running the program produces the table in stdout.
     28   1.1  mrg 
     29   1.1  mrg    The program works by generating every possible combination of up to
     30   1.1  mrg    MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
     31   1.1  mrg    shift) and computing the multiplier computed by those instructions.
     32   1.1  mrg    For example,
     33   1.1  mrg 
     34   1.1  mrg    s2a r2,r1,r1
     35   1.1  mrg    s2a r3,r2,r2
     36   1.1  mrg 
     37   1.1  mrg    multiplies r1 by 25.
     38   1.1  mrg 
     39   1.1  mrg    There are usually multiple instruction sequences to multiply by a
     40   1.1  mrg    given constant. This program keeps only the cheapest one.
     41   1.1  mrg    "Cheapest" is defined first by the minimum theoretical schedule
     42   1.1  mrg    length, and if those are equal then by the number of instructions,
     43   1.1  mrg    and if those are equal then by which instructions we "prefer"
     44   1.1  mrg    (e.g. because we think one might take infinitesimally less power
     45   1.1  mrg    than another, or simply because we arbitrarily pick one to be more
     46   1.1  mrg    canonical).
     47   1.1  mrg 
     48   1.1  mrg    Once this program has determined the best instruction sequence for
     49   1.1  mrg    each multiplier, it emits them in a table sorted by the multiplier
     50   1.1  mrg    so the user can binary-search it to look for a match.  The table is
     51   1.1  mrg    pruned based on various criteria to keep its sizes reasonable.  */
     52   1.1  mrg 
     53   1.1  mrg #include <stdio.h>
     54   1.1  mrg #include <stdlib.h>
     55   1.1  mrg #include <assert.h>
     56   1.1  mrg #include <string.h>
     57   1.1  mrg #define __STDC_LIMIT_MACROS
     58   1.1  mrg #include <stdint.h>
     59   1.1  mrg 
     60   1.1  mrg #include <map>
     61   1.1  mrg 
     62   1.1  mrg #ifdef TILEPRO
     63   1.1  mrg 
     64   1.1  mrg /* The string representing the architecture.  */
     65   1.1  mrg #define ARCH "tilepro"
     66   1.1  mrg 
     67   1.1  mrg /* The type for the multiplication.  */
     68   1.1  mrg typedef int MUL_TYPE;
     69   1.1  mrg 
     70   1.1  mrg #else
     71   1.1  mrg 
     72   1.1  mrg /* The string representing the architecture.  */
     73   1.1  mrg #define ARCH "tilegx"
     74   1.1  mrg 
     75   1.1  mrg /* The type for the multiplication.  */
     76   1.1  mrg typedef long long MUL_TYPE;
     77   1.1  mrg 
     78   1.1  mrg #endif
     79   1.1  mrg 
     80   1.1  mrg /* Longest instruction sequence this will produce. With the current
     81   1.1  mrg    stupid algorithm runtime grows very quickly with this number.  */
     82   1.1  mrg #define MAX_INSTRUCTIONS 4
     83   1.1  mrg 
     84   1.1  mrg /* Maximum number of subexpressions in the expression DAG being
     85   1.1  mrg    generated.  This is the same as the number of instructions, except
     86   1.1  mrg    that zero and the original register we'd like to multiply by a
     87   1.1  mrg    constant are also thrown into the mix.  */
     88   1.1  mrg #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
     89   1.1  mrg 
     90   1.1  mrg #define MIN(x, y)  ((x) <= (y) ? (x) : (y))
     91   1.1  mrg #define MAX(x, y)  ((x) >= (y) ? (x) : (y))
     92   1.1  mrg 
     93   1.1  mrg /* For this program a unary op is one which has only one nonconstant
     94   1.1  mrg    operand.  So shift left by 5 is considered unary.  */
     95   1.1  mrg typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
     96   1.1  mrg typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
     97   1.1  mrg 
     98   1.1  mrg /* This describes an operation like 'add two registers' or 'left-shift
     99   1.1  mrg    by 7'.
    100   1.1  mrg 
    101   1.1  mrg    We call something a 'unary' operator if it only takes in one
    102   1.1  mrg    register as input, even though it might have an implicit second
    103   1.1  mrg    constant operand.  Currently this is used for left-shift by
    104   1.1  mrg    constant.  */
    105   1.1  mrg class Operator
    106   1.1  mrg {
    107   1.1  mrg public:
    108   1.1  mrg   /* Construct for a binary operator.  */
    109   1.1  mrg   Operator (const char *pattern, const char *name, binary_op_func func,
    110   1.1  mrg 	    int cost)
    111   1.1  mrg     : m_pattern (pattern), m_name (name), m_top_index (-1),
    112   1.1  mrg       m_unary_func (0), m_binary_func (func), m_cost (cost),
    113   1.1  mrg       m_rhs_if_unary (0)
    114   1.1  mrg   {
    115   1.1  mrg   }
    116   1.1  mrg 
    117   1.1  mrg   /* Construct for a unary operator.  */
    118   1.1  mrg   Operator (const char *pattern, const char *name, unary_op_func func,
    119   1.1  mrg 	    int rhs_if_unary, int cost)
    120   1.1  mrg     : m_pattern (pattern), m_name (name), m_top_index (-1),
    121   1.1  mrg       m_unary_func (func), m_binary_func (0), m_cost (cost),
    122   1.1  mrg       m_rhs_if_unary (rhs_if_unary)
    123   1.1  mrg   {
    124   1.1  mrg   }
    125   1.1  mrg 
    126   1.1  mrg   bool is_unary () const
    127   1.1  mrg   {
    128   1.1  mrg     return m_binary_func == NULL;
    129   1.1  mrg   }
    130   1.1  mrg 
    131   1.1  mrg   /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3.  */
    132   1.1  mrg   const char *m_pattern;
    133   1.1  mrg 
    134   1.1  mrg   /* Name of the opcode for this operation, e.g. add.  */
    135   1.1  mrg   const char *m_name;
    136   1.1  mrg 
    137   1.1  mrg   /* We don't have enough bits in our output representation to store
    138   1.1  mrg      the original insn_code value, so we store a compressed form
    139   1.1  mrg      instead.  These values are decoded back into insn_code via the
    140   1.1  mrg      machine-generated multiply_insn_seq_decode_opcode lookup
    141   1.1  mrg      table.  */
    142   1.1  mrg   int m_top_index;
    143   1.1  mrg 
    144   1.1  mrg   /* Unary operator to apply, or NULL if this is a binary operator.  */
    145   1.1  mrg   unary_op_func m_unary_func;
    146   1.1  mrg 
    147   1.1  mrg   /* Binary operator to apply, or NULL if this is a unary operator.  */
    148   1.1  mrg   binary_op_func m_binary_func;
    149   1.1  mrg 
    150   1.1  mrg   /* Function of how expensive we consider this operator. Higher is
    151   1.1  mrg      worse.  */
    152   1.1  mrg   int m_cost;
    153   1.1  mrg 
    154   1.1  mrg   /* the RHS value to write into the C file if unary; used for shift
    155   1.1  mrg      count.  */
    156   1.1  mrg   int m_rhs_if_unary;
    157   1.1  mrg };
    158   1.1  mrg 
    159   1.1  mrg 
    160   1.1  mrg /* An entry in an expression DAG.  */
    161   1.1  mrg class Expr
    162   1.1  mrg {
    163   1.1  mrg public:
    164   1.1  mrg   Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
    165   1.1  mrg     m_critical_path_length (0)
    166   1.1  mrg   {
    167   1.1  mrg   }
    168   1.1  mrg 
    169   1.1  mrg   /* The operator being applied to the operands.  */
    170   1.1  mrg   const Operator *m_op;
    171   1.1  mrg 
    172   1.1  mrg   /* The index of the left-hand operand in the array of subexpressions
    173   1.1  mrg      already computed.  */
    174   1.1  mrg   int m_lhs;
    175   1.1  mrg 
    176   1.1  mrg   /* For binary ops ,this is the index of the left-hand operand in the
    177   1.1  mrg      array of subexpressions already computed. For unary ops, it is
    178   1.1  mrg      specific to the op (e.g. it might hold a constant shift
    179   1.1  mrg      count).  */
    180   1.1  mrg   int m_rhs;
    181   1.1  mrg 
    182   1.1  mrg   /* The multiplier produced by this expression tree. For example, for
    183   1.1  mrg      the tree ((x << 5) + x), the value would be 33.  */
    184   1.1  mrg   MUL_TYPE m_produced_val;
    185   1.1  mrg 
    186   1.1  mrg   /* How far is this expression from the root, i.e. how many cycles
    187   1.1  mrg      minimum will it take to compute this?  */
    188   1.1  mrg   int m_critical_path_length;
    189   1.1  mrg };
    190   1.1  mrg 
    191   1.1  mrg 
    192   1.1  mrg /* Make function pointers for the various linear operators we can
    193   1.1  mrg    apply to compute a multiplicative value.  */
    194   1.1  mrg 
    195   1.1  mrg static MUL_TYPE
    196   1.1  mrg add (MUL_TYPE n1, MUL_TYPE n2)
    197   1.1  mrg {
    198   1.1  mrg   return n1 + n2;
    199   1.1  mrg }
    200   1.1  mrg 
    201   1.1  mrg static MUL_TYPE
    202   1.1  mrg sub (MUL_TYPE n1, MUL_TYPE n2)
    203   1.1  mrg {
    204   1.1  mrg   return n1 - n2;
    205   1.1  mrg }
    206   1.1  mrg 
    207   1.1  mrg static MUL_TYPE
    208   1.1  mrg s1a (MUL_TYPE n1, MUL_TYPE n2)
    209   1.1  mrg {
    210   1.1  mrg   return n1 * 2 + n2;
    211   1.1  mrg }
    212   1.1  mrg 
    213   1.1  mrg static MUL_TYPE
    214   1.1  mrg s2a (MUL_TYPE n1, MUL_TYPE n2)
    215   1.1  mrg {
    216   1.1  mrg   return n1 * 4 + n2;
    217   1.1  mrg }
    218   1.1  mrg 
    219   1.1  mrg static MUL_TYPE
    220   1.1  mrg s3a (MUL_TYPE n1, MUL_TYPE n2)
    221   1.1  mrg {
    222   1.1  mrg   return n1 * 8 + n2;
    223   1.1  mrg }
    224   1.1  mrg 
    225   1.1  mrg #define SHIFT(count)                            \
    226   1.1  mrg static MUL_TYPE                                 \
    227   1.1  mrg shift##count(MUL_TYPE n)                        \
    228   1.1  mrg {                                               \
    229   1.1  mrg   return n << (count);                          \
    230   1.1  mrg }
    231   1.1  mrg 
    232   1.1  mrg SHIFT (1);
    233   1.1  mrg SHIFT (2);
    234   1.1  mrg SHIFT (3);
    235   1.1  mrg SHIFT (4);
    236   1.1  mrg SHIFT (5);
    237   1.1  mrg SHIFT (6);
    238   1.1  mrg SHIFT (7);
    239   1.1  mrg SHIFT (8);
    240   1.1  mrg SHIFT (9);
    241   1.1  mrg SHIFT (10);
    242   1.1  mrg SHIFT (11);
    243   1.1  mrg SHIFT (12);
    244   1.1  mrg SHIFT (13);
    245   1.1  mrg SHIFT (14);
    246   1.1  mrg SHIFT (15);
    247   1.1  mrg SHIFT (16);
    248   1.1  mrg SHIFT (17);
    249   1.1  mrg SHIFT (18);
    250   1.1  mrg SHIFT (19);
    251   1.1  mrg SHIFT (20);
    252   1.1  mrg SHIFT (21);
    253   1.1  mrg SHIFT (22);
    254   1.1  mrg SHIFT (23);
    255   1.1  mrg SHIFT (24);
    256   1.1  mrg SHIFT (25);
    257   1.1  mrg SHIFT (26);
    258   1.1  mrg SHIFT (27);
    259   1.1  mrg SHIFT (28);
    260   1.1  mrg SHIFT (29);
    261   1.1  mrg SHIFT (30);
    262   1.1  mrg SHIFT (31);
    263   1.1  mrg #ifndef TILEPRO
    264   1.1  mrg SHIFT (32);
    265   1.1  mrg SHIFT (33);
    266   1.1  mrg SHIFT (34);
    267   1.1  mrg SHIFT (35);
    268   1.1  mrg SHIFT (36);
    269   1.1  mrg SHIFT (37);
    270   1.1  mrg SHIFT (38);
    271   1.1  mrg SHIFT (39);
    272   1.1  mrg SHIFT (40);
    273   1.1  mrg SHIFT (41);
    274   1.1  mrg SHIFT (42);
    275   1.1  mrg SHIFT (43);
    276   1.1  mrg SHIFT (44);
    277   1.1  mrg SHIFT (45);
    278   1.1  mrg SHIFT (46);
    279   1.1  mrg SHIFT (47);
    280   1.1  mrg SHIFT (48);
    281   1.1  mrg SHIFT (49);
    282   1.1  mrg SHIFT (50);
    283   1.1  mrg SHIFT (51);
    284   1.1  mrg SHIFT (52);
    285   1.1  mrg SHIFT (53);
    286   1.1  mrg SHIFT (54);
    287   1.1  mrg SHIFT (55);
    288   1.1  mrg SHIFT (56);
    289   1.1  mrg SHIFT (57);
    290   1.1  mrg SHIFT (58);
    291   1.1  mrg SHIFT (59);
    292   1.1  mrg SHIFT (60);
    293   1.1  mrg SHIFT (61);
    294   1.1  mrg SHIFT (62);
    295   1.1  mrg SHIFT (63);
    296   1.1  mrg #endif
    297   1.1  mrg 
    298   1.1  mrg #ifdef TILEPRO
    299   1.1  mrg static Operator ops[] = {
    300   1.1  mrg   Operator ("CODE_FOR_addsi3", "add", add, 1040),
    301   1.1  mrg   Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
    302   1.1  mrg   Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
    303   1.1  mrg   Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
    304   1.1  mrg   Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
    305   1.1  mrg   /* Note: shl by 1 is not necessary, since adding a value to itself
    306   1.1  mrg      produces the same result. But the architecture team thinks
    307   1.1  mrg      left-shift may use slightly less power, so we might as well
    308   1.1  mrg      prefer it.  */
    309   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
    310   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
    311   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
    312   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
    313   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
    314   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
    315   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
    316   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
    317   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
    318   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
    319   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
    320   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
    321   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
    322   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
    323   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
    324   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
    325   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
    326   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
    327   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
    328   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
    329   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
    330   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
    331   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
    332   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
    333   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
    334   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
    335   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
    336   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
    337   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
    338   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
    339   1.1  mrg   Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
    340   1.1  mrg };
    341   1.1  mrg #else
    342   1.1  mrg static Operator ops[] = {
    343   1.1  mrg   Operator ("CODE_FOR_adddi3", "add", add, 1070),
    344   1.1  mrg   Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
    345   1.1  mrg   Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
    346   1.1  mrg   Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
    347   1.1  mrg   Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
    348   1.1  mrg   // Note: shl by 1 is not necessary, since adding a value to itself
    349   1.1  mrg   // produces the same result. But the architecture team thinks left-shift
    350   1.1  mrg   // may use slightly less power, so we might as well prefer it.
    351   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
    352   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
    353   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
    354   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
    355   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
    356   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
    357   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
    358   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
    359   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
    360   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
    361   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
    362   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
    363   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
    364   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
    365   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
    366   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
    367   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
    368   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
    369   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
    370   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
    371   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
    372   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
    373   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
    374   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
    375   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
    376   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
    377   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
    378   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
    379   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
    380   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
    381   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
    382   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
    383   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
    384   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
    385   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
    386   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
    387   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
    388   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
    389   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
    390   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
    391   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
    392   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
    393   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
    394   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
    395   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
    396   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
    397   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
    398   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
    399   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
    400   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
    401   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
    402   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
    403   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
    404   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
    405   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
    406   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
    407   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
    408   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
    409   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
    410   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
    411   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
    412   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
    413   1.1  mrg   Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
    414   1.1  mrg };
    415   1.1  mrg #endif
    416   1.1  mrg 
    417   1.1  mrg /* An ExpressionTree is an expression DAG.  */
    418   1.1  mrg class ExpressionTree
    419   1.1  mrg {
    420   1.1  mrg public:
    421   1.1  mrg   ExpressionTree () : m_num_vals (2)
    422   1.1  mrg   {
    423   1.1  mrg     m_exprs[0].m_produced_val = 0;
    424   1.1  mrg     m_exprs[1].m_produced_val = 1;
    425   1.1  mrg   }
    426   1.1  mrg 
    427   1.1  mrg   void copy_technique_from (ExpressionTree * other)
    428   1.1  mrg   {
    429   1.1  mrg     /* TODO: write this; other can compute the same products with less
    430   1.1  mrg        cost.  We update this ExpressionTree object because some int is
    431   1.1  mrg        already mapped to it.  */
    432   1.1  mrg   }
    433   1.1  mrg 
    434   1.1  mrg   int m_num_vals;
    435   1.1  mrg   Expr m_exprs[MAX_SUBEXPRS];
    436   1.1  mrg 
    437   1.1  mrg   int cost () const
    438   1.1  mrg   {
    439   1.1  mrg     int cost = 0;
    440   1.1  mrg     for (int j = 2; j < m_num_vals; j++)
    441   1.1  mrg         cost += m_exprs[j].m_op->m_cost;
    442   1.1  mrg       return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
    443   1.1  mrg   }
    444   1.1  mrg };
    445   1.1  mrg 
    446   1.1  mrg 
    447   1.1  mrg typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
    448   1.1  mrg 
    449   1.1  mrg 
    450   1.1  mrg static void
    451   1.1  mrg find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
    452   1.1  mrg {
    453   1.1  mrg   /* Don't look more if we can't add any new values to the expression
    454   1.1  mrg      tree.  */
    455   1.1  mrg   const int num_vals = s.m_num_vals;
    456   1.1  mrg   if (num_vals == MAX_SUBEXPRS)
    457   1.1  mrg     return;
    458   1.1  mrg 
    459   1.1  mrg   /* Grow array to make room for new values added as we loop.  */
    460   1.1  mrg   s.m_num_vals = num_vals + 1;
    461   1.1  mrg 
    462   1.1  mrg   const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
    463   1.1  mrg   const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
    464   1.1  mrg 
    465   1.1  mrg   for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
    466   1.1  mrg     {
    467   1.1  mrg       const Operator *const op = &ops[f];
    468   1.1  mrg 
    469   1.1  mrg       for (int i = 0; i < num_vals; i++)
    470   1.1  mrg 	{
    471   1.1  mrg 	  /* Only allow zero as the first operand to sub, otherwise
    472   1.1  mrg 	     it is useless.  */
    473   1.1  mrg 	  if (i == 0 && op->m_binary_func != sub)
    474   1.1  mrg 	    continue;
    475   1.1  mrg 
    476   1.1  mrg 	  /* Unary ops don't actually use the second operand, so as a
    477   1.1  mrg 	     big hack we trick it into only looping once in the inner
    478   1.1  mrg 	     loop.  */
    479   1.1  mrg 	  const int j_end = op->is_unary () ? 2 : num_vals;
    480   1.1  mrg 
    481   1.1  mrg 	  /* We never allow zero as the second operand, as it is
    482   1.1  mrg 	     always useless.  */
    483   1.1  mrg 	  for (int j = 1; j < j_end; j++)
    484   1.1  mrg 	    {
    485   1.1  mrg 	      /* Does this expression use the immediately previous
    486   1.1  mrg 		 expression?  */
    487   1.1  mrg 	      const bool uses_prev_value =
    488   1.1  mrg 		(i == num_vals - 1
    489   1.1  mrg 		 || (!op->is_unary () && j == num_vals - 1));
    490   1.1  mrg 
    491   1.1  mrg 	      if (!uses_prev_value)
    492   1.1  mrg 		{
    493   1.1  mrg 		  /* For search efficiency, prune redundant
    494   1.1  mrg 		     instruction orderings.
    495   1.1  mrg 
    496   1.1  mrg 		     This op does not take the immediately preceding
    497   1.1  mrg 		     value as input, which means we could have done it
    498   1.1  mrg 		     in the previous slot. If its opcode is less than
    499   1.1  mrg 		     the previous instruction's, we'll declare this
    500   1.1  mrg 		     instruction order non-canonical and not pursue
    501   1.1  mrg 		     this branch of the search.  */
    502   1.1  mrg 		  if (op->m_top_index < prev_top_index)
    503   1.1  mrg 		    continue;
    504   1.1  mrg 		}
    505   1.1  mrg 
    506   1.1  mrg 	      MUL_TYPE n;
    507   1.1  mrg 	      if (op->is_unary ())
    508   1.1  mrg 		{
    509   1.1  mrg 		  n = op->m_unary_func (s.m_exprs[i].m_produced_val);
    510   1.1  mrg 		}
    511   1.1  mrg 	      else
    512   1.1  mrg 		{
    513   1.1  mrg 		  n = op->m_binary_func (s.m_exprs[i].m_produced_val,
    514   1.1  mrg 					 s.m_exprs[j].m_produced_val);
    515   1.1  mrg 		}
    516   1.1  mrg 
    517   1.1  mrg 	      bool duplicate = false;
    518   1.1  mrg 	      for (int k = 0; k < num_vals; k++)
    519   1.1  mrg 		if (n == s.m_exprs[j].m_produced_val)
    520   1.1  mrg 		  {
    521   1.1  mrg 		    duplicate = true;
    522   1.1  mrg 		    break;
    523   1.1  mrg 		  }
    524   1.1  mrg 
    525   1.1  mrg 	      if (duplicate)
    526   1.1  mrg 		continue;
    527   1.1  mrg 
    528   1.1  mrg 	      /* See if we found the best solution for n.  */
    529   1.1  mrg 	      Expr *e = &s.m_exprs[num_vals];
    530   1.1  mrg 	      e->m_op = op;
    531   1.1  mrg 	      e->m_lhs = i;
    532   1.1  mrg 	      e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
    533   1.1  mrg 	      e->m_produced_val = n;
    534   1.1  mrg 	      e->m_critical_path_length =
    535   1.1  mrg 		1 + MAX (s.m_exprs[i].m_critical_path_length,
    536   1.1  mrg 			 s.m_exprs[j].m_critical_path_length);
    537   1.1  mrg 
    538   1.1  mrg 	      ExpressionTreeMap::iterator best (best_solution.find (n));
    539   1.1  mrg 	      if (best == best_solution.end ()
    540   1.1  mrg 		  || (*best).second->cost () > s.cost ())
    541   1.1  mrg 		best_solution[n] = new ExpressionTree (s);
    542   1.1  mrg 
    543   1.1  mrg 	      /* Recurse and find more.  */
    544   1.1  mrg 	      find_sequences (s, best_solution);
    545   1.1  mrg 	    }
    546   1.1  mrg 	}
    547   1.1  mrg     }
    548   1.1  mrg 
    549   1.1  mrg   /* Restore old size.  */
    550   1.1  mrg   s.m_num_vals = num_vals;
    551   1.1  mrg }
    552   1.1  mrg 
    553   1.1  mrg 
    554   1.1  mrg /* For each insn_code used by an operator, choose a compact number so
    555   1.1  mrg    it takes less space in the output data structure. This prints out a
    556   1.1  mrg    lookup table used to map the compactified number back to an
    557   1.1  mrg    insn_code.  */
    558   1.1  mrg static void
    559   1.1  mrg create_insn_code_compression_table ()
    560   1.1  mrg {
    561   1.1  mrg   int next_index = 1;
    562   1.1  mrg 
    563   1.1  mrg   /* Entry 0 must hold CODE_FOR_nothing to mean "end of array".  */
    564   1.1  mrg   printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
    565   1.1  mrg 	  "  CODE_FOR_nothing /* must be first */ ", ARCH);
    566   1.1  mrg 
    567   1.1  mrg   for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
    568   1.1  mrg     {
    569   1.1  mrg       Operator *op = &ops[i];
    570   1.1  mrg       int index = -1;
    571   1.1  mrg 
    572   1.1  mrg       /* See if some previous Operator was using the same insn_code.
    573   1.1  mrg 	 If so, reuse its table entry.  */
    574   1.1  mrg       for (size_t j = 0; j < i; j++)
    575   1.1  mrg 	{
    576   1.1  mrg 	  Operator *old = &ops[j];
    577   1.1  mrg 	  if (strcmp (old->m_pattern, op->m_pattern) == 0)
    578   1.1  mrg 	    {
    579   1.1  mrg 	      index = old->m_top_index;
    580   1.1  mrg 	      break;
    581   1.1  mrg 	    }
    582   1.1  mrg 	}
    583   1.1  mrg 
    584   1.1  mrg       if (index == -1)
    585   1.1  mrg 	{
    586   1.1  mrg 	  /* We need to make a new entry in the table.  */
    587   1.1  mrg 	  printf (",\n  %s", op->m_pattern);
    588   1.1  mrg 	  index = next_index++;
    589   1.1  mrg 	}
    590   1.1  mrg 
    591   1.1  mrg       op->m_top_index = index;
    592   1.1  mrg     }
    593   1.1  mrg 
    594   1.1  mrg   printf ("\n};\n\n");
    595   1.1  mrg }
    596   1.1  mrg 
    597   1.1  mrg 
    598   1.1  mrg /* These are constants we've seen in code, that we want to keep table
    599   1.1  mrg    entries for.  */
    600   1.1  mrg static int multiply_constants_used[] = {
    601   1.1  mrg   -11796480,
    602   1.1  mrg   -27439,
    603   1.1  mrg   -25148,
    604   1.1  mrg   -22820,
    605   1.1  mrg   -21709,
    606   1.1  mrg   -20995,
    607   1.1  mrg   -20284,
    608   1.1  mrg   -20239,
    609   1.1  mrg   -19595,
    610   1.1  mrg   -19447,
    611   1.1  mrg   -19183,
    612   1.1  mrg   -19165,
    613   1.1  mrg   -18730,
    614   1.1  mrg   -17828,
    615   1.1  mrg   -17799,
    616   1.1  mrg   -17237,
    617   1.1  mrg   -17036,
    618   1.1  mrg   -16549,
    619   1.1  mrg   -16423,
    620   1.1  mrg   -16294,
    621   1.1  mrg   -16244,
    622   1.1  mrg   -16069,
    623   1.1  mrg   -15137,
    624   1.1  mrg   -15083,
    625   1.1  mrg   -15038,
    626   1.1  mrg   -14924,
    627   1.1  mrg   -14905,
    628   1.1  mrg   -14752,
    629   1.1  mrg   -14731,
    630   1.1  mrg   -14529,
    631   1.1  mrg   -14273,
    632   1.1  mrg   -14090,
    633   1.1  mrg   -14084,
    634   1.1  mrg   -14043,
    635   1.1  mrg   -13850,
    636   1.1  mrg   -13802,
    637   1.1  mrg   -13631,
    638   1.1  mrg   -13455,
    639   1.1  mrg   -13275,
    640   1.1  mrg   -12879,
    641   1.1  mrg   -12700,
    642   1.1  mrg   -12534,
    643   1.1  mrg   -12399,
    644   1.1  mrg   -12131,
    645   1.1  mrg   -12112,
    646   1.1  mrg   -12054,
    647   1.1  mrg   -12019,
    648   1.1  mrg   -11759,
    649   1.1  mrg   -11585,
    650   1.1  mrg   -11467,
    651   1.1  mrg   -11395,
    652   1.1  mrg   -11295,
    653   1.1  mrg   -11248,
    654   1.1  mrg   -11148,
    655   1.1  mrg   -11116,
    656   1.1  mrg   -11086,
    657   1.1  mrg   -11059,
    658   1.1  mrg   -11018,
    659   1.1  mrg   -10811,
    660   1.1  mrg   -10538,
    661   1.1  mrg   -10258,
    662   1.1  mrg   -10217,
    663   1.1  mrg   -10033,
    664   1.1  mrg   -9766,
    665   1.1  mrg   -9754,
    666   1.1  mrg   -9534,
    667   1.1  mrg   -9527,
    668   1.1  mrg   -9467,
    669   1.1  mrg   -9262,
    670   1.1  mrg   -9232,
    671   1.1  mrg   -9222,
    672   1.1  mrg   -9198,
    673   1.1  mrg   -9191,
    674   1.1  mrg   -9113,
    675   1.1  mrg   -8825,
    676   1.1  mrg   -8756,
    677   1.1  mrg   -8697,
    678   1.1  mrg   -8693,
    679   1.1  mrg   -8565,
    680   1.1  mrg   -8342,
    681   1.1  mrg   -8208,
    682   1.1  mrg   -8200,
    683   1.1  mrg   -8170,
    684   1.1  mrg   -8102,
    685   1.1  mrg   -7770,
    686   1.1  mrg   -7678,
    687   1.1  mrg   -7562,
    688   1.1  mrg   -7376,
    689   1.1  mrg   -7373,
    690   1.1  mrg   -7221,
    691   1.1  mrg   -7121,
    692   1.1  mrg   -6835,
    693   1.1  mrg   -6810,
    694   1.1  mrg   -6626,
    695   1.1  mrg   -6581,
    696   1.1  mrg   -6461,
    697   1.1  mrg   -6278,
    698   1.1  mrg   -6263,
    699   1.1  mrg   -6163,
    700   1.1  mrg   -6029,
    701   1.1  mrg   -5816,
    702   1.1  mrg   -5540,
    703   1.1  mrg   -5461,
    704   1.1  mrg   -5384,
    705   1.1  mrg   -5329,
    706   1.1  mrg   -4985,
    707   1.1  mrg   -4926,
    708   1.1  mrg   -4815,
    709   1.1  mrg   -4788,
    710   1.1  mrg   -4758,
    711   1.1  mrg   -4433,
    712   1.1  mrg   -4229,
    713   1.1  mrg   -4209,
    714   1.1  mrg   -4176,
    715   1.1  mrg   -4104,
    716   1.1  mrg   -4095,
    717   1.1  mrg   -4078,
    718   1.1  mrg   -3941,
    719   1.1  mrg   -3818,
    720   1.1  mrg   -3600,
    721   1.1  mrg   -3474,
    722   1.1  mrg   -3314,
    723   1.1  mrg   -3264,
    724   1.1  mrg   -3196,
    725   1.1  mrg   -3072,
    726   1.1  mrg   -2912,
    727   1.1  mrg   -2836,
    728   1.1  mrg   -2773,
    729   1.1  mrg   -2269,
    730   1.1  mrg   -2184,
    731   1.1  mrg   -2100,
    732   1.1  mrg   -1730,
    733   1.1  mrg   -1512,
    734   1.1  mrg   -1500,
    735   1.1  mrg   -1396,
    736   1.1  mrg   -1344,
    737   1.1  mrg   -1312,
    738   1.1  mrg   -1297,
    739   1.1  mrg   -1059,
    740   1.1  mrg   -1058,
    741   1.1  mrg   1027,
    742   1.1  mrg   1049,
    743   1.1  mrg   1059,
    744   1.1  mrg   1100,
    745   1.1  mrg   1104,
    746   1.1  mrg   1108,
    747   1.1  mrg   1136,
    748   1.1  mrg   1200,
    749   1.1  mrg   1204,
    750   1.1  mrg   1242,
    751   1.1  mrg   1292,
    752   1.1  mrg   1304,
    753   1.1  mrg   1312,
    754   1.1  mrg   1320,
    755   1.1  mrg   1336,
    756   1.1  mrg   1344,
    757   1.1  mrg   1348,
    758   1.1  mrg   1360,
    759   1.1  mrg   1364,
    760   1.1  mrg   1395,
    761   1.1  mrg   1448,
    762   1.1  mrg   1460,
    763   1.1  mrg   1461,
    764   1.1  mrg   1472,
    765   1.1  mrg   1488,
    766   1.1  mrg   1500,
    767   1.1  mrg   1512,
    768   1.1  mrg   1568,
    769   1.1  mrg   1576,
    770   1.1  mrg   1649,
    771   1.1  mrg   1664,
    772   1.1  mrg   1684,
    773   1.1  mrg   1696,
    774   1.1  mrg   1744,
    775   1.1  mrg   1812,
    776   1.1  mrg   1822,
    777   1.1  mrg   1884,
    778   1.1  mrg   1963,
    779   1.1  mrg   1978,
    780   1.1  mrg   2000,
    781   1.1  mrg   2012,
    782   1.1  mrg   2014,
    783   1.1  mrg   2037,
    784   1.1  mrg   2039,
    785   1.1  mrg   2100,
    786   1.1  mrg   2139,
    787   1.1  mrg   2144,
    788   1.1  mrg   2184,
    789   1.1  mrg   2237,
    790   1.1  mrg   2260,
    791   1.1  mrg   2320,
    792   1.1  mrg   2408,
    793   1.1  mrg   2446,
    794   1.1  mrg   2447,
    795   1.1  mrg   2499,
    796   1.1  mrg   2531,
    797   1.1  mrg   2578,
    798   1.1  mrg   2592,
    799   1.1  mrg   2611,
    800   1.1  mrg   2633,
    801   1.1  mrg   2704,
    802   1.1  mrg   2730,
    803   1.1  mrg   2773,
    804   1.1  mrg   2880,
    805   1.1  mrg   2896,
    806   1.1  mrg   2998,
    807   1.1  mrg   3000,
    808   1.1  mrg   3001,
    809   1.1  mrg   3021,
    810   1.1  mrg   3079,
    811   1.1  mrg   3112,
    812   1.1  mrg   3150,
    813   1.1  mrg   3179,
    814   1.1  mrg   3192,
    815   1.1  mrg   3240,
    816   1.1  mrg   3264,
    817   1.1  mrg   3271,
    818   1.1  mrg   3283,
    819   1.1  mrg   3328,
    820   1.1  mrg   3363,
    821   1.1  mrg   3367,
    822   1.1  mrg   3453,
    823   1.1  mrg   3529,
    824   1.1  mrg   3570,
    825   1.1  mrg   3580,
    826   1.1  mrg   3600,
    827   1.1  mrg   3624,
    828   1.1  mrg   3707,
    829   1.1  mrg   3783,
    830   1.1  mrg   3826,
    831   1.1  mrg   3897,
    832   1.1  mrg   3941,
    833   1.1  mrg   3962,
    834   1.1  mrg   3989,
    835   1.1  mrg   4000,
    836   1.1  mrg   4025,
    837   1.1  mrg   4073,
    838   1.1  mrg   4093,
    839   1.1  mrg   4099,
    840   1.1  mrg   4108,
    841   1.1  mrg   4184,
    842   1.1  mrg   4209,
    843   1.1  mrg   4369,
    844   1.1  mrg   4376,
    845   1.1  mrg   4416,
    846   1.1  mrg   4433,
    847   1.1  mrg   4434,
    848   1.1  mrg   4482,
    849   1.1  mrg   4582,
    850   1.1  mrg   4712,
    851   1.1  mrg   4717,
    852   1.1  mrg   4813,
    853   1.1  mrg   4815,
    854   1.1  mrg   4864,
    855   1.1  mrg   5000,
    856   1.1  mrg   5027,
    857   1.1  mrg   5040,
    858   1.1  mrg   5091,
    859   1.1  mrg   5195,
    860   1.1  mrg   5243,
    861   1.1  mrg   5260,
    862   1.1  mrg   5285,
    863   1.1  mrg   5329,
    864   1.1  mrg   5331,
    865   1.1  mrg   5350,
    866   1.1  mrg   5361,
    867   1.1  mrg   5387,
    868   1.1  mrg   5461,
    869   1.1  mrg   5492,
    870   1.1  mrg   5529,
    871   1.1  mrg   5573,
    872   1.1  mrg   5793,
    873   1.1  mrg   5819,
    874   1.1  mrg   5915,
    875   1.1  mrg   5946,
    876   1.1  mrg   5992,
    877   1.1  mrg   6000,
    878   1.1  mrg   6164,
    879   1.1  mrg   6205,
    880   1.1  mrg   6262,
    881   1.1  mrg   6263,
    882   1.1  mrg   6269,
    883   1.1  mrg   6270,
    884   1.1  mrg   6387,
    885   1.1  mrg   6400,
    886   1.1  mrg   6406,
    887   1.1  mrg   6476,
    888   1.1  mrg   6541,
    889   1.1  mrg   6565,
    890   1.1  mrg   6568,
    891   1.1  mrg   6626,
    892   1.1  mrg   6656,
    893   1.1  mrg   6732,
    894   1.1  mrg   6810,
    895   1.1  mrg   6817,
    896   1.1  mrg   6859,
    897   1.1  mrg   7040,
    898   1.1  mrg   7053,
    899   1.1  mrg   7141,
    900   1.1  mrg   7169,
    901   1.1  mrg   7221,
    902   1.1  mrg   7223,
    903   1.1  mrg   7274,
    904   1.1  mrg   7282,
    905   1.1  mrg   7350,
    906   1.1  mrg   7369,
    907   1.1  mrg   7373,
    908   1.1  mrg   7442,
    909   1.1  mrg   7447,
    910   1.1  mrg   7471,
    911   1.1  mrg   7518,
    912   1.1  mrg   7542,
    913   1.1  mrg   7566,
    914   1.1  mrg   7587,
    915   1.1  mrg   7663,
    916   1.1  mrg   7678,
    917   1.1  mrg   7682,
    918   1.1  mrg   7748,
    919   1.1  mrg   7752,
    920   1.1  mrg   7791,
    921   1.1  mrg   8000,
    922   1.1  mrg   8026,
    923   1.1  mrg   8048,
    924   1.1  mrg   8170,
    925   1.1  mrg   8203,
    926   1.1  mrg   8204,
    927   1.1  mrg   8290,
    928   1.1  mrg   8368,
    929   1.1  mrg   8520,
    930   1.1  mrg   8640,
    931   1.1  mrg   8666,
    932   1.1  mrg   8672,
    933   1.1  mrg   8697,
    934   1.1  mrg   8716,
    935   1.1  mrg   8728,
    936   1.1  mrg   8756,
    937   1.1  mrg   8820,
    938   1.1  mrg   8875,
    939   1.1  mrg   8918,
    940   1.1  mrg   8956,
    941   1.1  mrg   9058,
    942   1.1  mrg   9154,
    943   1.1  mrg   9175,
    944   1.1  mrg   9191,
    945   1.1  mrg   9217,
    946   1.1  mrg   9262,
    947   1.1  mrg   9321,
    948   1.1  mrg   9373,
    949   1.1  mrg   9434,
    950   1.1  mrg   9465,
    951   1.1  mrg   9514,
    952   1.1  mrg   9534,
    953   1.1  mrg   9633,
    954   1.1  mrg   9746,
    955   1.1  mrg   9810,
    956   1.1  mrg   9850,
    957   1.1  mrg   9947,
    958   1.1  mrg   9973,
    959   1.1  mrg   10000,
    960   1.1  mrg   10009,
    961   1.1  mrg   10033,
    962   1.1  mrg   10055,
    963   1.1  mrg   10217,
    964   1.1  mrg   10248,
    965   1.1  mrg   10298,
    966   1.1  mrg   10310,
    967   1.1  mrg   10323,
    968   1.1  mrg   10368,
    969   1.1  mrg   10438,
    970   1.1  mrg   10456,
    971   1.1  mrg   10486,
    972   1.1  mrg   10538,
    973   1.1  mrg   10664,
    974   1.1  mrg   10695,
    975   1.1  mrg   10700,
    976   1.1  mrg   10703,
    977   1.1  mrg   10832,
    978   1.1  mrg   10887,
    979   1.1  mrg   10935,
    980   1.1  mrg   10958,
    981   1.1  mrg   11018,
    982   1.1  mrg   11059,
    983   1.1  mrg   11061,
    984   1.1  mrg   11086,
    985   1.1  mrg   11116,
    986   1.1  mrg   11148,
    987   1.1  mrg   11190,
    988   1.1  mrg   11249,
    989   1.1  mrg   11314,
    990   1.1  mrg   11332,
    991   1.1  mrg   11363,
    992   1.1  mrg   11409,
    993   1.1  mrg   11415,
    994   1.1  mrg   11443,
    995   1.1  mrg   11467,
    996   1.1  mrg   11512,
    997   1.1  mrg   11522,
    998   1.1  mrg   11529,
    999   1.1  mrg   11585,
   1000   1.1  mrg   11759,
   1001   1.1  mrg   11768,
   1002   1.1  mrg   11795,
   1003   1.1  mrg   11893,
   1004   1.1  mrg   11997,
   1005   1.1  mrg   12131,
   1006   1.1  mrg   12299,
   1007   1.1  mrg   12536,
   1008   1.1  mrg   12543,
   1009   1.1  mrg   12893,
   1010   1.1  mrg   12945,
   1011   1.1  mrg   12998,
   1012   1.1  mrg   13109,
   1013   1.1  mrg   13213,
   1014   1.1  mrg   13685,
   1015   1.1  mrg   13930,
   1016   1.1  mrg   14023,
   1017   1.1  mrg   14024,
   1018   1.1  mrg   14271,
   1019   1.1  mrg   14564,
   1020   1.1  mrg   14647,
   1021   1.1  mrg   15326,
   1022   1.1  mrg   15850,
   1023   1.1  mrg   15855,
   1024   1.1  mrg   15929,
   1025   1.1  mrg   16000,
   1026   1.1  mrg   16154,
   1027   1.1  mrg   16496,
   1028   1.1  mrg   16807,
   1029   1.1  mrg   16819,
   1030   1.1  mrg   16984,
   1031   1.1  mrg   17203,
   1032   1.1  mrg   17223,
   1033   1.1  mrg   17333,
   1034   1.1  mrg   17760,
   1035   1.1  mrg   17799,
   1036   1.1  mrg   17837,
   1037   1.1  mrg   18029,
   1038   1.1  mrg   18068,
   1039   1.1  mrg   18336,
   1040   1.1  mrg   18515,
   1041   1.1  mrg   19595,
   1042   1.1  mrg   20017,
   1043   1.1  mrg   20131,
   1044   1.1  mrg   20862,
   1045   1.1  mrg   20995,
   1046   1.1  mrg   21709,
   1047   1.1  mrg   22554,
   1048   1.1  mrg   25000,
   1049   1.1  mrg   25172,
   1050   1.1  mrg   25600,
   1051   1.1  mrg   25733,
   1052   1.1  mrg   27439,
   1053   1.1  mrg   38470,
   1054   1.1  mrg   46802,
   1055   1.1  mrg   50000,
   1056   1.1  mrg   11796480,
   1057   1.1  mrg   16843009,
   1058   1.1  mrg   23592960,
   1059   1.1  mrg };
   1060   1.1  mrg 
   1061   1.1  mrg 
   1062   1.1  mrg const int num_mult_constants_used =
   1063   1.1  mrg   (int)(sizeof multiply_constants_used
   1064   1.1  mrg 	/ sizeof multiply_constants_used[0]);
   1065   1.1  mrg 
   1066   1.1  mrg 
   1067   1.1  mrg #define XSIZE (sizeof multiply_constants_used / \
   1068   1.1  mrg 	       sizeof multiply_constants_used[0] + 32) / 32
   1069   1.1  mrg unsigned multiply_constants_avail[XSIZE];
   1070   1.1  mrg #undef XSIZE
   1071   1.1  mrg 
   1072   1.1  mrg 
   1073   1.1  mrg /* bsearch helper function.  */
   1074   1.1  mrg static int
   1075   1.1  mrg compare_constants (const void *key, const void *t)
   1076   1.1  mrg {
   1077   1.1  mrg   return (*(int*)key) - *((int*)t);
   1078   1.1  mrg }
   1079   1.1  mrg 
   1080   1.1  mrg 
   1081   1.1  mrg static int *
   1082   1.1  mrg find_mult_constants_used (int multiplier)
   1083   1.1  mrg {
   1084   1.1  mrg   return (int *) bsearch (&multiplier, multiply_constants_used,
   1085   1.1  mrg 			  num_mult_constants_used,
   1086   1.1  mrg 			  sizeof multiply_constants_used[0],
   1087   1.1  mrg 			  compare_constants);
   1088   1.1  mrg }
   1089   1.1  mrg 
   1090   1.1  mrg 
   1091   1.1  mrg int num_ops (ExpressionTree *s)
   1092   1.1  mrg {
   1093   1.1  mrg   int n = 0;
   1094   1.1  mrg   for (int i = 0; i < s->m_num_vals; i++)
   1095   1.1  mrg     {
   1096   1.1  mrg       Expr *e = &s->m_exprs[i];
   1097   1.1  mrg       if (e->m_op != NULL)
   1098   1.1  mrg 	n++;
   1099   1.1  mrg     }
   1100   1.1  mrg   return n;
   1101   1.1  mrg }
   1102   1.1  mrg 
   1103   1.1  mrg 
   1104   1.1  mrg #ifdef TILEPRO
   1105   1.1  mrg bool
   1106   1.1  mrg tilepro_emit (int multiplier, int num_ops)
   1107   1.1  mrg {
   1108   1.1  mrg   int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
   1109   1.1  mrg 
   1110   1.1  mrg   /* Keep constants in range [-1024, 1024].  */
   1111   1.1  mrg   if (abs_multiplier <= 1024)
   1112   1.1  mrg     return true;
   1113   1.1  mrg 
   1114   1.1  mrg   /* Keep constants near powers of two.  */
   1115   1.1  mrg   int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
   1116   1.1  mrg   int next_pow2 = prev_pow2 << 1;
   1117   1.1  mrg 
   1118   1.1  mrg   if ((abs_multiplier - prev_pow2 <= 10)
   1119   1.1  mrg       || (next_pow2 - abs_multiplier <= 10))
   1120   1.1  mrg     return true;
   1121   1.1  mrg 
   1122   1.1  mrg   /* Keep constants near powers of ten.  */
   1123   1.1  mrg   {
   1124   1.1  mrg     long long j = 1;
   1125   1.1  mrg     long long prev_pow10;
   1126   1.1  mrg     long long next_pow10;
   1127   1.1  mrg 
   1128   1.1  mrg     while (((j * 10) < abs_multiplier)
   1129   1.1  mrg 	   && (j < (j * 10)))
   1130   1.1  mrg       j = j * 10;
   1131   1.1  mrg 
   1132   1.1  mrg     prev_pow10 = j;
   1133   1.1  mrg     next_pow10 = j * 10;
   1134   1.1  mrg 
   1135   1.1  mrg     if ((abs_multiplier - prev_pow10 <= 10)
   1136   1.1  mrg 	|| (next_pow10 - abs_multiplier <= 10))
   1137   1.1  mrg       return true;
   1138   1.1  mrg   }
   1139   1.1  mrg 
   1140   1.1  mrg   /* Keep short sequences that have two or fewer ops.  */
   1141   1.1  mrg   if (num_ops <= 2)
   1142   1.1  mrg     return true;
   1143   1.1  mrg 
   1144   1.1  mrg   /* Keep constants that are mostly 0's or mostly 1's.  */
   1145   1.1  mrg   if (__builtin_popcount (multiplier) <= 2 ||
   1146   1.1  mrg       __builtin_popcount (multiplier) >= 30)
   1147   1.1  mrg     return true;
   1148   1.1  mrg 
   1149   1.1  mrg   /* Keep constants seen in actual code.  */
   1150   1.1  mrg   if ((find_mult_constants_used (multiplier)))
   1151   1.1  mrg     return true;
   1152   1.1  mrg 
   1153   1.1  mrg   return false;
   1154   1.1  mrg }
   1155   1.1  mrg #else
   1156   1.1  mrg bool
   1157   1.1  mrg tilegx_emit (long long multiplier, int num_ops)
   1158   1.1  mrg {
   1159   1.1  mrg   long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
   1160   1.1  mrg 
   1161   1.1  mrg   /* Keep constants in range [-1024, 1024].  */
   1162   1.1  mrg   if (abs_multiplier <= 1024)
   1163   1.1  mrg     return true;
   1164   1.1  mrg 
   1165   1.1  mrg   /* Otherwise exclude sequences with four ops.  */
   1166   1.1  mrg   if (num_ops > 3)
   1167   1.1  mrg     return false;
   1168   1.1  mrg 
   1169   1.1  mrg   /* Keep constants near powers of two.  */
   1170   1.1  mrg   {
   1171   1.1  mrg     unsigned long long prev_pow2 =
   1172   1.1  mrg       1LL << (63 - __builtin_clzll (abs_multiplier));
   1173   1.1  mrg     unsigned long long next_pow2 = prev_pow2 << 1;
   1174   1.1  mrg 
   1175   1.1  mrg     /* handle overflow case. */
   1176   1.1  mrg     if (next_pow2 == 0)
   1177   1.1  mrg       {
   1178   1.1  mrg 	if (prev_pow2 - abs_multiplier <= 10)
   1179   1.1  mrg 	  return true;
   1180   1.1  mrg       }
   1181   1.1  mrg     else if ((abs_multiplier - prev_pow2 <= 10)
   1182   1.1  mrg 	     || (next_pow2 - abs_multiplier <= 10))
   1183   1.1  mrg       return true;
   1184   1.1  mrg   }
   1185   1.1  mrg 
   1186   1.1  mrg   /* Keep constants near powers of ten.  */
   1187   1.1  mrg   {
   1188   1.1  mrg     long long j = 1;
   1189   1.1  mrg     long long prev_pow10;
   1190   1.1  mrg     long long next_pow10;
   1191   1.1  mrg 
   1192   1.1  mrg     while (((j * 10) < abs_multiplier)
   1193   1.9  mrg 	   && (j < (j * 10)))
   1194   1.1  mrg       j = j * 10;
   1195   1.1  mrg 
   1196   1.1  mrg     prev_pow10 = j;
   1197   1.9  mrg     next_pow10 = j * 10;
   1198   1.1  mrg 
   1199   1.1  mrg     if ((abs_multiplier - prev_pow10 <= 100)
   1200   1.1  mrg 	|| (next_pow10
   1201   1.1  mrg 	    && (next_pow10 - abs_multiplier <= 100)))
   1202   1.1  mrg       return true;
   1203   1.1  mrg   }
   1204   1.1  mrg 
   1205   1.1  mrg   if (num_ops <= 2)
   1206   1.1  mrg     return true;
   1207   1.1  mrg 
   1208   1.1  mrg   /* Keep constants that are mostly 0's or mostly 1's.  */
   1209   1.1  mrg   if (__builtin_popcountll (multiplier) <= 2 ||
   1210   1.1  mrg       __builtin_popcountll (multiplier) >= 62)
   1211   1.1  mrg     return true;
   1212   1.1  mrg 
   1213   1.1  mrg   /* Keep constants seen in actual code.  */
   1214   1.1  mrg   if (find_mult_constants_used (multiplier))
   1215   1.1  mrg     return true;
   1216   1.1  mrg 
   1217   1.1  mrg   return false;
   1218   1.1  mrg }
   1219   1.1  mrg #endif
   1220   1.1  mrg 
   1221   1.1  mrg 
   1222   1.1  mrg int
   1223   1.1  mrg main ()
   1224   1.1  mrg {
   1225   1.1  mrg   ExpressionTreeMap best_solution;
   1226   1.1  mrg   ExpressionTree s;
   1227   1.1  mrg 
   1228   1.1  mrg #ifdef TILEPRO
   1229   1.1  mrg   printf ("/* Constant multiply table for TILEPro.\n");
   1230   1.1  mrg #else
   1231   1.1  mrg   printf ("/* Constant multiply table for TILE-Gx.\n");
   1232   1.1  mrg #endif
   1233  1.10  mrg   printf ("   Copyright (C) 2011-2022 Free Software Foundation, Inc.\n");
   1234   1.1  mrg   printf ("   Contributed by Walter Lee (walt (at) tilera.com)\n");
   1235   1.1  mrg   printf ("\n");
   1236   1.1  mrg   printf ("   This file is part of GCC.\n");
   1237   1.1  mrg   printf ("\n");
   1238   1.1  mrg   printf ("   GCC is free software; you can redistribute it and/or modify it\n");
   1239   1.1  mrg   printf ("   under the terms of the GNU General Public License as published\n");
   1240   1.1  mrg   printf ("   by the Free Software Foundation; either version 3, or (at your\n");
   1241   1.1  mrg   printf ("   option) any later version.\n");
   1242   1.1  mrg   printf ("\n");
   1243   1.1  mrg   printf ("   GCC is distributed in the hope that it will be useful, but WITHOUT\n");
   1244   1.1  mrg   printf ("   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
   1245   1.1  mrg   printf ("   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public\n");
   1246   1.1  mrg   printf ("   License for more details.\n");
   1247   1.1  mrg   printf ("\n");
   1248   1.1  mrg   printf ("   You should have received a copy of the GNU General Public License\n");
   1249   1.1  mrg   printf ("   along with GCC; see the file COPYING3.  If not see\n");
   1250   1.1  mrg   printf ("   <http://www.gnu.org/licenses/>.  */\n");
   1251   1.1  mrg   printf ("\n");
   1252   1.3  mrg   printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
   1253   1.3  mrg   printf ("   Make any required changes there.  */\n");
   1254   1.3  mrg   printf ("\n");
   1255  1.10  mrg   printf ("#define IN_TARGET_CODE 1\n");
   1256  1.10  mrg   printf ("\n");
   1257   1.1  mrg   printf ("#include \"config.h\"\n");
   1258   1.1  mrg   printf ("#include \"system.h\"\n");
   1259   1.1  mrg   printf ("#include \"coretypes.h\"\n");
   1260   1.4  mrg   printf ("#include \"backend.h\"\n");
   1261   1.3  mrg   printf ("#include \"rtl.h\"\n");
   1262   1.3  mrg   printf ("#include \"expmed.h\"\n");
   1263   1.1  mrg   printf ("#include \"%s-multiply.h\"\n\n", ARCH);
   1264   1.1  mrg   create_insn_code_compression_table ();
   1265   1.1  mrg 
   1266   1.1  mrg   /* Try all combinations of operators and see what constants we
   1267   1.1  mrg      produce.  For each possible constant, record the most efficient
   1268   1.1  mrg      way to generate it.  */
   1269   1.1  mrg   find_sequences (s, best_solution);
   1270   1.1  mrg 
   1271   1.1  mrg   printf ("const struct %s_multiply_insn_seq "
   1272   1.1  mrg 	  "%s_multiply_insn_seq_table[] = {\n",
   1273   1.1  mrg 	  ARCH, ARCH);
   1274   1.1  mrg 
   1275   1.1  mrg   const char *comma_separator = "";
   1276   1.1  mrg 
   1277   1.1  mrg   ExpressionTreeMap::iterator i (best_solution.begin ());
   1278   1.1  mrg   for (; i != best_solution.end (); ++i)
   1279   1.1  mrg     {
   1280   1.1  mrg       ExpressionTree *s = (*i).second;
   1281   1.1  mrg       const MUL_TYPE n = (*i).first;
   1282   1.1  mrg 
   1283   1.1  mrg       if (n == 0 || n == 1)
   1284   1.1  mrg 	{
   1285   1.1  mrg 	  /* Both of these require zero operations, so these entries
   1286   1.1  mrg 	     are bogus and should never be used.  */
   1287   1.1  mrg 	  continue;
   1288   1.1  mrg 	}
   1289   1.1  mrg 
   1290   1.1  mrg       /* Prune the list of entries to keep the table to a reasonable
   1291   1.1  mrg 	 size.  */
   1292   1.1  mrg #ifdef TILEPRO
   1293   1.1  mrg       if (!tilepro_emit (n, num_ops (s)))
   1294   1.1  mrg 	continue;
   1295   1.1  mrg #else
   1296   1.1  mrg       if (!tilegx_emit (n, num_ops (s)))
   1297   1.1  mrg 	continue;
   1298   1.1  mrg #endif
   1299   1.1  mrg 
   1300   1.1  mrg       printf ("%s", comma_separator);
   1301   1.1  mrg 
   1302   1.1  mrg #ifdef TILEPRO
   1303   1.1  mrg       const MUL_TYPE int_min = INT32_MIN;
   1304   1.1  mrg #else
   1305   1.1  mrg       const MUL_TYPE int_min = INT64_MIN;
   1306   1.1  mrg #endif
   1307   1.1  mrg       if (n == int_min)
   1308   1.1  mrg 	{
   1309   1.1  mrg 	  /* Handle C's woeful lack of unary negation. Without this,
   1310   1.1  mrg 	     printing out INT_MIN in decimal will yield an unsigned
   1311   1.1  mrg 	     int which could generate a compiler warning.  */
   1312   1.1  mrg #ifdef TILEPRO
   1313   1.1  mrg 	  printf ("  {%d - 1 /* 0x%x */ ,\n   {", n + 1,
   1314   1.1  mrg 		  (unsigned) n);
   1315   1.1  mrg #else
   1316   1.1  mrg 	  printf ("  {%lldll - 1 /* 0x%llx */ ,\n   {", n + 1,
   1317   1.1  mrg 		  (unsigned MUL_TYPE) n);
   1318   1.1  mrg #endif
   1319   1.1  mrg 	}
   1320   1.1  mrg       else
   1321   1.1  mrg 	{
   1322   1.1  mrg #ifdef TILEPRO
   1323   1.1  mrg 	  printf ("  {%d /* 0x%x */ ,\n   {", n, (unsigned) n);
   1324   1.1  mrg #else
   1325   1.1  mrg 	  printf ("  {%lldll /* 0x%llx */ ,\n   {", n, (unsigned MUL_TYPE) n);
   1326   1.1  mrg #endif
   1327   1.1  mrg 	}
   1328   1.1  mrg 
   1329   1.1  mrg       bool first = true;
   1330   1.1  mrg       for (int j = 0; j < s->m_num_vals; j++)
   1331   1.1  mrg 	{
   1332   1.1  mrg 	  Expr *e = &s->m_exprs[j];
   1333   1.1  mrg 
   1334   1.1  mrg 	  const Operator *op = e->m_op;
   1335   1.1  mrg 	  if (op == NULL)
   1336   1.1  mrg 	    continue;
   1337   1.1  mrg 
   1338   1.1  mrg 	  char buf[1024];
   1339   1.1  mrg 	  snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
   1340   1.1  mrg 		    first ? "" : "    ",
   1341   1.1  mrg 		    op->m_top_index,
   1342   1.1  mrg 		    e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
   1343   1.1  mrg 
   1344   1.1  mrg 	  char opnd0[10];
   1345   1.1  mrg 	  if (e->m_lhs)
   1346   1.1  mrg 	    snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
   1347   1.1  mrg 	  else
   1348   1.1  mrg 	    snprintf (opnd0, sizeof opnd0, "zero");
   1349   1.1  mrg 
   1350   1.1  mrg 	  printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
   1351   1.1  mrg 		  buf, op->m_name, j, opnd0,
   1352   1.1  mrg 		  op->is_unary () ? "" : "r", e->m_rhs);
   1353   1.1  mrg 
   1354   1.1  mrg 	  first = false;
   1355   1.1  mrg 	}
   1356   1.1  mrg       printf ("   }");
   1357   1.1  mrg       comma_separator = ",\n";
   1358   1.1  mrg     }
   1359   1.1  mrg 
   1360   1.1  mrg   printf ("\n};\n\n");
   1361   1.1  mrg   printf ("const int %s_multiply_insn_seq_table_size =\n"
   1362   1.1  mrg 	  "  (int) (sizeof %s_multiply_insn_seq_table\n"
   1363   1.1  mrg 	  "         / sizeof %s_multiply_insn_seq_table[0]);\n",
   1364   1.1  mrg 	  ARCH, ARCH, ARCH);
   1365   1.1  mrg 
   1366   1.1  mrg   return EXIT_SUCCESS;
   1367   1.1  mrg }
   1368