1 1.1 mrg /* Multiply table generator for tile. 2 1.10 mrg Copyright (C) 2011-2022 Free Software Foundation, Inc. 3 1.1 mrg Contributed by Walter Lee (walt (at) tilera.com) 4 1.1 mrg 5 1.1 mrg This file is part of GCC. 6 1.1 mrg 7 1.1 mrg GCC is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published 9 1.1 mrg by the Free Software Foundation; either version 3, or (at your 10 1.1 mrg option) any later version. 11 1.1 mrg 12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 1.1 mrg License for more details. 16 1.1 mrg 17 1.1 mrg You should have received a copy of the GNU General Public License 18 1.1 mrg along with GCC; see the file COPYING3. If not see 19 1.1 mrg <http://www.gnu.org/licenses/>. */ 20 1.1 mrg 21 1.1 mrg /* This program creates a table used to compile multiply by constant 22 1.1 mrg efficiently. 23 1.1 mrg 24 1.1 mrg This program should be compiled by a c++ compiler. If it's 25 1.4 mrg compiled with -DTILEPRO, it generates the multiply table for 26 1.1 mrg TILEPro; otherwise it generates the multiply table for TILE-Gx. 27 1.1 mrg Running the program produces the table in stdout. 28 1.1 mrg 29 1.1 mrg The program works by generating every possible combination of up to 30 1.1 mrg MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left 31 1.1 mrg shift) and computing the multiplier computed by those instructions. 32 1.1 mrg For example, 33 1.1 mrg 34 1.1 mrg s2a r2,r1,r1 35 1.1 mrg s2a r3,r2,r2 36 1.1 mrg 37 1.1 mrg multiplies r1 by 25. 38 1.1 mrg 39 1.1 mrg There are usually multiple instruction sequences to multiply by a 40 1.1 mrg given constant. This program keeps only the cheapest one. 41 1.1 mrg "Cheapest" is defined first by the minimum theoretical schedule 42 1.1 mrg length, and if those are equal then by the number of instructions, 43 1.1 mrg and if those are equal then by which instructions we "prefer" 44 1.1 mrg (e.g. because we think one might take infinitesimally less power 45 1.1 mrg than another, or simply because we arbitrarily pick one to be more 46 1.1 mrg canonical). 47 1.1 mrg 48 1.1 mrg Once this program has determined the best instruction sequence for 49 1.1 mrg each multiplier, it emits them in a table sorted by the multiplier 50 1.1 mrg so the user can binary-search it to look for a match. The table is 51 1.1 mrg pruned based on various criteria to keep its sizes reasonable. */ 52 1.1 mrg 53 1.1 mrg #include <stdio.h> 54 1.1 mrg #include <stdlib.h> 55 1.1 mrg #include <assert.h> 56 1.1 mrg #include <string.h> 57 1.1 mrg #define __STDC_LIMIT_MACROS 58 1.1 mrg #include <stdint.h> 59 1.1 mrg 60 1.1 mrg #include <map> 61 1.1 mrg 62 1.1 mrg #ifdef TILEPRO 63 1.1 mrg 64 1.1 mrg /* The string representing the architecture. */ 65 1.1 mrg #define ARCH "tilepro" 66 1.1 mrg 67 1.1 mrg /* The type for the multiplication. */ 68 1.1 mrg typedef int MUL_TYPE; 69 1.1 mrg 70 1.1 mrg #else 71 1.1 mrg 72 1.1 mrg /* The string representing the architecture. */ 73 1.1 mrg #define ARCH "tilegx" 74 1.1 mrg 75 1.1 mrg /* The type for the multiplication. */ 76 1.1 mrg typedef long long MUL_TYPE; 77 1.1 mrg 78 1.1 mrg #endif 79 1.1 mrg 80 1.1 mrg /* Longest instruction sequence this will produce. With the current 81 1.1 mrg stupid algorithm runtime grows very quickly with this number. */ 82 1.1 mrg #define MAX_INSTRUCTIONS 4 83 1.1 mrg 84 1.1 mrg /* Maximum number of subexpressions in the expression DAG being 85 1.1 mrg generated. This is the same as the number of instructions, except 86 1.1 mrg that zero and the original register we'd like to multiply by a 87 1.1 mrg constant are also thrown into the mix. */ 88 1.1 mrg #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS) 89 1.1 mrg 90 1.1 mrg #define MIN(x, y) ((x) <= (y) ? (x) : (y)) 91 1.1 mrg #define MAX(x, y) ((x) >= (y) ? (x) : (y)) 92 1.1 mrg 93 1.1 mrg /* For this program a unary op is one which has only one nonconstant 94 1.1 mrg operand. So shift left by 5 is considered unary. */ 95 1.1 mrg typedef MUL_TYPE (*unary_op_func) (MUL_TYPE); 96 1.1 mrg typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE); 97 1.1 mrg 98 1.1 mrg /* This describes an operation like 'add two registers' or 'left-shift 99 1.1 mrg by 7'. 100 1.1 mrg 101 1.1 mrg We call something a 'unary' operator if it only takes in one 102 1.1 mrg register as input, even though it might have an implicit second 103 1.1 mrg constant operand. Currently this is used for left-shift by 104 1.1 mrg constant. */ 105 1.1 mrg class Operator 106 1.1 mrg { 107 1.1 mrg public: 108 1.1 mrg /* Construct for a binary operator. */ 109 1.1 mrg Operator (const char *pattern, const char *name, binary_op_func func, 110 1.1 mrg int cost) 111 1.1 mrg : m_pattern (pattern), m_name (name), m_top_index (-1), 112 1.1 mrg m_unary_func (0), m_binary_func (func), m_cost (cost), 113 1.1 mrg m_rhs_if_unary (0) 114 1.1 mrg { 115 1.1 mrg } 116 1.1 mrg 117 1.1 mrg /* Construct for a unary operator. */ 118 1.1 mrg Operator (const char *pattern, const char *name, unary_op_func func, 119 1.1 mrg int rhs_if_unary, int cost) 120 1.1 mrg : m_pattern (pattern), m_name (name), m_top_index (-1), 121 1.1 mrg m_unary_func (func), m_binary_func (0), m_cost (cost), 122 1.1 mrg m_rhs_if_unary (rhs_if_unary) 123 1.1 mrg { 124 1.1 mrg } 125 1.1 mrg 126 1.1 mrg bool is_unary () const 127 1.1 mrg { 128 1.1 mrg return m_binary_func == NULL; 129 1.1 mrg } 130 1.1 mrg 131 1.1 mrg /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */ 132 1.1 mrg const char *m_pattern; 133 1.1 mrg 134 1.1 mrg /* Name of the opcode for this operation, e.g. add. */ 135 1.1 mrg const char *m_name; 136 1.1 mrg 137 1.1 mrg /* We don't have enough bits in our output representation to store 138 1.1 mrg the original insn_code value, so we store a compressed form 139 1.1 mrg instead. These values are decoded back into insn_code via the 140 1.1 mrg machine-generated multiply_insn_seq_decode_opcode lookup 141 1.1 mrg table. */ 142 1.1 mrg int m_top_index; 143 1.1 mrg 144 1.1 mrg /* Unary operator to apply, or NULL if this is a binary operator. */ 145 1.1 mrg unary_op_func m_unary_func; 146 1.1 mrg 147 1.1 mrg /* Binary operator to apply, or NULL if this is a unary operator. */ 148 1.1 mrg binary_op_func m_binary_func; 149 1.1 mrg 150 1.1 mrg /* Function of how expensive we consider this operator. Higher is 151 1.1 mrg worse. */ 152 1.1 mrg int m_cost; 153 1.1 mrg 154 1.1 mrg /* the RHS value to write into the C file if unary; used for shift 155 1.1 mrg count. */ 156 1.1 mrg int m_rhs_if_unary; 157 1.1 mrg }; 158 1.1 mrg 159 1.1 mrg 160 1.1 mrg /* An entry in an expression DAG. */ 161 1.1 mrg class Expr 162 1.1 mrg { 163 1.1 mrg public: 164 1.1 mrg Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0), 165 1.1 mrg m_critical_path_length (0) 166 1.1 mrg { 167 1.1 mrg } 168 1.1 mrg 169 1.1 mrg /* The operator being applied to the operands. */ 170 1.1 mrg const Operator *m_op; 171 1.1 mrg 172 1.1 mrg /* The index of the left-hand operand in the array of subexpressions 173 1.1 mrg already computed. */ 174 1.1 mrg int m_lhs; 175 1.1 mrg 176 1.1 mrg /* For binary ops ,this is the index of the left-hand operand in the 177 1.1 mrg array of subexpressions already computed. For unary ops, it is 178 1.1 mrg specific to the op (e.g. it might hold a constant shift 179 1.1 mrg count). */ 180 1.1 mrg int m_rhs; 181 1.1 mrg 182 1.1 mrg /* The multiplier produced by this expression tree. For example, for 183 1.1 mrg the tree ((x << 5) + x), the value would be 33. */ 184 1.1 mrg MUL_TYPE m_produced_val; 185 1.1 mrg 186 1.1 mrg /* How far is this expression from the root, i.e. how many cycles 187 1.1 mrg minimum will it take to compute this? */ 188 1.1 mrg int m_critical_path_length; 189 1.1 mrg }; 190 1.1 mrg 191 1.1 mrg 192 1.1 mrg /* Make function pointers for the various linear operators we can 193 1.1 mrg apply to compute a multiplicative value. */ 194 1.1 mrg 195 1.1 mrg static MUL_TYPE 196 1.1 mrg add (MUL_TYPE n1, MUL_TYPE n2) 197 1.1 mrg { 198 1.1 mrg return n1 + n2; 199 1.1 mrg } 200 1.1 mrg 201 1.1 mrg static MUL_TYPE 202 1.1 mrg sub (MUL_TYPE n1, MUL_TYPE n2) 203 1.1 mrg { 204 1.1 mrg return n1 - n2; 205 1.1 mrg } 206 1.1 mrg 207 1.1 mrg static MUL_TYPE 208 1.1 mrg s1a (MUL_TYPE n1, MUL_TYPE n2) 209 1.1 mrg { 210 1.1 mrg return n1 * 2 + n2; 211 1.1 mrg } 212 1.1 mrg 213 1.1 mrg static MUL_TYPE 214 1.1 mrg s2a (MUL_TYPE n1, MUL_TYPE n2) 215 1.1 mrg { 216 1.1 mrg return n1 * 4 + n2; 217 1.1 mrg } 218 1.1 mrg 219 1.1 mrg static MUL_TYPE 220 1.1 mrg s3a (MUL_TYPE n1, MUL_TYPE n2) 221 1.1 mrg { 222 1.1 mrg return n1 * 8 + n2; 223 1.1 mrg } 224 1.1 mrg 225 1.1 mrg #define SHIFT(count) \ 226 1.1 mrg static MUL_TYPE \ 227 1.1 mrg shift##count(MUL_TYPE n) \ 228 1.1 mrg { \ 229 1.1 mrg return n << (count); \ 230 1.1 mrg } 231 1.1 mrg 232 1.1 mrg SHIFT (1); 233 1.1 mrg SHIFT (2); 234 1.1 mrg SHIFT (3); 235 1.1 mrg SHIFT (4); 236 1.1 mrg SHIFT (5); 237 1.1 mrg SHIFT (6); 238 1.1 mrg SHIFT (7); 239 1.1 mrg SHIFT (8); 240 1.1 mrg SHIFT (9); 241 1.1 mrg SHIFT (10); 242 1.1 mrg SHIFT (11); 243 1.1 mrg SHIFT (12); 244 1.1 mrg SHIFT (13); 245 1.1 mrg SHIFT (14); 246 1.1 mrg SHIFT (15); 247 1.1 mrg SHIFT (16); 248 1.1 mrg SHIFT (17); 249 1.1 mrg SHIFT (18); 250 1.1 mrg SHIFT (19); 251 1.1 mrg SHIFT (20); 252 1.1 mrg SHIFT (21); 253 1.1 mrg SHIFT (22); 254 1.1 mrg SHIFT (23); 255 1.1 mrg SHIFT (24); 256 1.1 mrg SHIFT (25); 257 1.1 mrg SHIFT (26); 258 1.1 mrg SHIFT (27); 259 1.1 mrg SHIFT (28); 260 1.1 mrg SHIFT (29); 261 1.1 mrg SHIFT (30); 262 1.1 mrg SHIFT (31); 263 1.1 mrg #ifndef TILEPRO 264 1.1 mrg SHIFT (32); 265 1.1 mrg SHIFT (33); 266 1.1 mrg SHIFT (34); 267 1.1 mrg SHIFT (35); 268 1.1 mrg SHIFT (36); 269 1.1 mrg SHIFT (37); 270 1.1 mrg SHIFT (38); 271 1.1 mrg SHIFT (39); 272 1.1 mrg SHIFT (40); 273 1.1 mrg SHIFT (41); 274 1.1 mrg SHIFT (42); 275 1.1 mrg SHIFT (43); 276 1.1 mrg SHIFT (44); 277 1.1 mrg SHIFT (45); 278 1.1 mrg SHIFT (46); 279 1.1 mrg SHIFT (47); 280 1.1 mrg SHIFT (48); 281 1.1 mrg SHIFT (49); 282 1.1 mrg SHIFT (50); 283 1.1 mrg SHIFT (51); 284 1.1 mrg SHIFT (52); 285 1.1 mrg SHIFT (53); 286 1.1 mrg SHIFT (54); 287 1.1 mrg SHIFT (55); 288 1.1 mrg SHIFT (56); 289 1.1 mrg SHIFT (57); 290 1.1 mrg SHIFT (58); 291 1.1 mrg SHIFT (59); 292 1.1 mrg SHIFT (60); 293 1.1 mrg SHIFT (61); 294 1.1 mrg SHIFT (62); 295 1.1 mrg SHIFT (63); 296 1.1 mrg #endif 297 1.1 mrg 298 1.1 mrg #ifdef TILEPRO 299 1.1 mrg static Operator ops[] = { 300 1.1 mrg Operator ("CODE_FOR_addsi3", "add", add, 1040), 301 1.1 mrg Operator ("CODE_FOR_subsi3", "sub", sub, 1041), 302 1.1 mrg Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042), 303 1.1 mrg Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043), 304 1.1 mrg Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044), 305 1.1 mrg /* Note: shl by 1 is not necessary, since adding a value to itself 306 1.1 mrg produces the same result. But the architecture team thinks 307 1.1 mrg left-shift may use slightly less power, so we might as well 308 1.1 mrg prefer it. */ 309 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001), 310 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002), 311 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003), 312 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004), 313 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005), 314 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006), 315 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007), 316 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008), 317 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009), 318 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010), 319 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011), 320 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012), 321 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013), 322 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014), 323 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015), 324 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016), 325 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017), 326 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018), 327 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019), 328 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020), 329 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021), 330 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022), 331 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023), 332 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024), 333 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025), 334 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026), 335 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027), 336 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028), 337 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029), 338 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030), 339 1.1 mrg Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031) 340 1.1 mrg }; 341 1.1 mrg #else 342 1.1 mrg static Operator ops[] = { 343 1.1 mrg Operator ("CODE_FOR_adddi3", "add", add, 1070), 344 1.1 mrg Operator ("CODE_FOR_subdi3", "sub", sub, 1071), 345 1.1 mrg Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072), 346 1.1 mrg Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073), 347 1.1 mrg Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074), 348 1.1 mrg // Note: shl by 1 is not necessary, since adding a value to itself 349 1.1 mrg // produces the same result. But the architecture team thinks left-shift 350 1.1 mrg // may use slightly less power, so we might as well prefer it. 351 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001), 352 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002), 353 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003), 354 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004), 355 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005), 356 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006), 357 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007), 358 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008), 359 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009), 360 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010), 361 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011), 362 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012), 363 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013), 364 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014), 365 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015), 366 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016), 367 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017), 368 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018), 369 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019), 370 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020), 371 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021), 372 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022), 373 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023), 374 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024), 375 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025), 376 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026), 377 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027), 378 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028), 379 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029), 380 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030), 381 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031), 382 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032), 383 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033), 384 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034), 385 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035), 386 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036), 387 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037), 388 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038), 389 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039), 390 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040), 391 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041), 392 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042), 393 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043), 394 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044), 395 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045), 396 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046), 397 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047), 398 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048), 399 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049), 400 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050), 401 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051), 402 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052), 403 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053), 404 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054), 405 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055), 406 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056), 407 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057), 408 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058), 409 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059), 410 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060), 411 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061), 412 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062), 413 1.1 mrg Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063) 414 1.1 mrg }; 415 1.1 mrg #endif 416 1.1 mrg 417 1.1 mrg /* An ExpressionTree is an expression DAG. */ 418 1.1 mrg class ExpressionTree 419 1.1 mrg { 420 1.1 mrg public: 421 1.1 mrg ExpressionTree () : m_num_vals (2) 422 1.1 mrg { 423 1.1 mrg m_exprs[0].m_produced_val = 0; 424 1.1 mrg m_exprs[1].m_produced_val = 1; 425 1.1 mrg } 426 1.1 mrg 427 1.1 mrg void copy_technique_from (ExpressionTree * other) 428 1.1 mrg { 429 1.1 mrg /* TODO: write this; other can compute the same products with less 430 1.1 mrg cost. We update this ExpressionTree object because some int is 431 1.1 mrg already mapped to it. */ 432 1.1 mrg } 433 1.1 mrg 434 1.1 mrg int m_num_vals; 435 1.1 mrg Expr m_exprs[MAX_SUBEXPRS]; 436 1.1 mrg 437 1.1 mrg int cost () const 438 1.1 mrg { 439 1.1 mrg int cost = 0; 440 1.1 mrg for (int j = 2; j < m_num_vals; j++) 441 1.1 mrg cost += m_exprs[j].m_op->m_cost; 442 1.1 mrg return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000; 443 1.1 mrg } 444 1.1 mrg }; 445 1.1 mrg 446 1.1 mrg 447 1.1 mrg typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap; 448 1.1 mrg 449 1.1 mrg 450 1.1 mrg static void 451 1.1 mrg find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution) 452 1.1 mrg { 453 1.1 mrg /* Don't look more if we can't add any new values to the expression 454 1.1 mrg tree. */ 455 1.1 mrg const int num_vals = s.m_num_vals; 456 1.1 mrg if (num_vals == MAX_SUBEXPRS) 457 1.1 mrg return; 458 1.1 mrg 459 1.1 mrg /* Grow array to make room for new values added as we loop. */ 460 1.1 mrg s.m_num_vals = num_vals + 1; 461 1.1 mrg 462 1.1 mrg const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op; 463 1.1 mrg const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1; 464 1.1 mrg 465 1.1 mrg for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++) 466 1.1 mrg { 467 1.1 mrg const Operator *const op = &ops[f]; 468 1.1 mrg 469 1.1 mrg for (int i = 0; i < num_vals; i++) 470 1.1 mrg { 471 1.1 mrg /* Only allow zero as the first operand to sub, otherwise 472 1.1 mrg it is useless. */ 473 1.1 mrg if (i == 0 && op->m_binary_func != sub) 474 1.1 mrg continue; 475 1.1 mrg 476 1.1 mrg /* Unary ops don't actually use the second operand, so as a 477 1.1 mrg big hack we trick it into only looping once in the inner 478 1.1 mrg loop. */ 479 1.1 mrg const int j_end = op->is_unary () ? 2 : num_vals; 480 1.1 mrg 481 1.1 mrg /* We never allow zero as the second operand, as it is 482 1.1 mrg always useless. */ 483 1.1 mrg for (int j = 1; j < j_end; j++) 484 1.1 mrg { 485 1.1 mrg /* Does this expression use the immediately previous 486 1.1 mrg expression? */ 487 1.1 mrg const bool uses_prev_value = 488 1.1 mrg (i == num_vals - 1 489 1.1 mrg || (!op->is_unary () && j == num_vals - 1)); 490 1.1 mrg 491 1.1 mrg if (!uses_prev_value) 492 1.1 mrg { 493 1.1 mrg /* For search efficiency, prune redundant 494 1.1 mrg instruction orderings. 495 1.1 mrg 496 1.1 mrg This op does not take the immediately preceding 497 1.1 mrg value as input, which means we could have done it 498 1.1 mrg in the previous slot. If its opcode is less than 499 1.1 mrg the previous instruction's, we'll declare this 500 1.1 mrg instruction order non-canonical and not pursue 501 1.1 mrg this branch of the search. */ 502 1.1 mrg if (op->m_top_index < prev_top_index) 503 1.1 mrg continue; 504 1.1 mrg } 505 1.1 mrg 506 1.1 mrg MUL_TYPE n; 507 1.1 mrg if (op->is_unary ()) 508 1.1 mrg { 509 1.1 mrg n = op->m_unary_func (s.m_exprs[i].m_produced_val); 510 1.1 mrg } 511 1.1 mrg else 512 1.1 mrg { 513 1.1 mrg n = op->m_binary_func (s.m_exprs[i].m_produced_val, 514 1.1 mrg s.m_exprs[j].m_produced_val); 515 1.1 mrg } 516 1.1 mrg 517 1.1 mrg bool duplicate = false; 518 1.1 mrg for (int k = 0; k < num_vals; k++) 519 1.1 mrg if (n == s.m_exprs[j].m_produced_val) 520 1.1 mrg { 521 1.1 mrg duplicate = true; 522 1.1 mrg break; 523 1.1 mrg } 524 1.1 mrg 525 1.1 mrg if (duplicate) 526 1.1 mrg continue; 527 1.1 mrg 528 1.1 mrg /* See if we found the best solution for n. */ 529 1.1 mrg Expr *e = &s.m_exprs[num_vals]; 530 1.1 mrg e->m_op = op; 531 1.1 mrg e->m_lhs = i; 532 1.1 mrg e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j; 533 1.1 mrg e->m_produced_val = n; 534 1.1 mrg e->m_critical_path_length = 535 1.1 mrg 1 + MAX (s.m_exprs[i].m_critical_path_length, 536 1.1 mrg s.m_exprs[j].m_critical_path_length); 537 1.1 mrg 538 1.1 mrg ExpressionTreeMap::iterator best (best_solution.find (n)); 539 1.1 mrg if (best == best_solution.end () 540 1.1 mrg || (*best).second->cost () > s.cost ()) 541 1.1 mrg best_solution[n] = new ExpressionTree (s); 542 1.1 mrg 543 1.1 mrg /* Recurse and find more. */ 544 1.1 mrg find_sequences (s, best_solution); 545 1.1 mrg } 546 1.1 mrg } 547 1.1 mrg } 548 1.1 mrg 549 1.1 mrg /* Restore old size. */ 550 1.1 mrg s.m_num_vals = num_vals; 551 1.1 mrg } 552 1.1 mrg 553 1.1 mrg 554 1.1 mrg /* For each insn_code used by an operator, choose a compact number so 555 1.1 mrg it takes less space in the output data structure. This prints out a 556 1.1 mrg lookup table used to map the compactified number back to an 557 1.1 mrg insn_code. */ 558 1.1 mrg static void 559 1.1 mrg create_insn_code_compression_table () 560 1.1 mrg { 561 1.1 mrg int next_index = 1; 562 1.1 mrg 563 1.1 mrg /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */ 564 1.1 mrg printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n" 565 1.1 mrg " CODE_FOR_nothing /* must be first */ ", ARCH); 566 1.1 mrg 567 1.1 mrg for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++) 568 1.1 mrg { 569 1.1 mrg Operator *op = &ops[i]; 570 1.1 mrg int index = -1; 571 1.1 mrg 572 1.1 mrg /* See if some previous Operator was using the same insn_code. 573 1.1 mrg If so, reuse its table entry. */ 574 1.1 mrg for (size_t j = 0; j < i; j++) 575 1.1 mrg { 576 1.1 mrg Operator *old = &ops[j]; 577 1.1 mrg if (strcmp (old->m_pattern, op->m_pattern) == 0) 578 1.1 mrg { 579 1.1 mrg index = old->m_top_index; 580 1.1 mrg break; 581 1.1 mrg } 582 1.1 mrg } 583 1.1 mrg 584 1.1 mrg if (index == -1) 585 1.1 mrg { 586 1.1 mrg /* We need to make a new entry in the table. */ 587 1.1 mrg printf (",\n %s", op->m_pattern); 588 1.1 mrg index = next_index++; 589 1.1 mrg } 590 1.1 mrg 591 1.1 mrg op->m_top_index = index; 592 1.1 mrg } 593 1.1 mrg 594 1.1 mrg printf ("\n};\n\n"); 595 1.1 mrg } 596 1.1 mrg 597 1.1 mrg 598 1.1 mrg /* These are constants we've seen in code, that we want to keep table 599 1.1 mrg entries for. */ 600 1.1 mrg static int multiply_constants_used[] = { 601 1.1 mrg -11796480, 602 1.1 mrg -27439, 603 1.1 mrg -25148, 604 1.1 mrg -22820, 605 1.1 mrg -21709, 606 1.1 mrg -20995, 607 1.1 mrg -20284, 608 1.1 mrg -20239, 609 1.1 mrg -19595, 610 1.1 mrg -19447, 611 1.1 mrg -19183, 612 1.1 mrg -19165, 613 1.1 mrg -18730, 614 1.1 mrg -17828, 615 1.1 mrg -17799, 616 1.1 mrg -17237, 617 1.1 mrg -17036, 618 1.1 mrg -16549, 619 1.1 mrg -16423, 620 1.1 mrg -16294, 621 1.1 mrg -16244, 622 1.1 mrg -16069, 623 1.1 mrg -15137, 624 1.1 mrg -15083, 625 1.1 mrg -15038, 626 1.1 mrg -14924, 627 1.1 mrg -14905, 628 1.1 mrg -14752, 629 1.1 mrg -14731, 630 1.1 mrg -14529, 631 1.1 mrg -14273, 632 1.1 mrg -14090, 633 1.1 mrg -14084, 634 1.1 mrg -14043, 635 1.1 mrg -13850, 636 1.1 mrg -13802, 637 1.1 mrg -13631, 638 1.1 mrg -13455, 639 1.1 mrg -13275, 640 1.1 mrg -12879, 641 1.1 mrg -12700, 642 1.1 mrg -12534, 643 1.1 mrg -12399, 644 1.1 mrg -12131, 645 1.1 mrg -12112, 646 1.1 mrg -12054, 647 1.1 mrg -12019, 648 1.1 mrg -11759, 649 1.1 mrg -11585, 650 1.1 mrg -11467, 651 1.1 mrg -11395, 652 1.1 mrg -11295, 653 1.1 mrg -11248, 654 1.1 mrg -11148, 655 1.1 mrg -11116, 656 1.1 mrg -11086, 657 1.1 mrg -11059, 658 1.1 mrg -11018, 659 1.1 mrg -10811, 660 1.1 mrg -10538, 661 1.1 mrg -10258, 662 1.1 mrg -10217, 663 1.1 mrg -10033, 664 1.1 mrg -9766, 665 1.1 mrg -9754, 666 1.1 mrg -9534, 667 1.1 mrg -9527, 668 1.1 mrg -9467, 669 1.1 mrg -9262, 670 1.1 mrg -9232, 671 1.1 mrg -9222, 672 1.1 mrg -9198, 673 1.1 mrg -9191, 674 1.1 mrg -9113, 675 1.1 mrg -8825, 676 1.1 mrg -8756, 677 1.1 mrg -8697, 678 1.1 mrg -8693, 679 1.1 mrg -8565, 680 1.1 mrg -8342, 681 1.1 mrg -8208, 682 1.1 mrg -8200, 683 1.1 mrg -8170, 684 1.1 mrg -8102, 685 1.1 mrg -7770, 686 1.1 mrg -7678, 687 1.1 mrg -7562, 688 1.1 mrg -7376, 689 1.1 mrg -7373, 690 1.1 mrg -7221, 691 1.1 mrg -7121, 692 1.1 mrg -6835, 693 1.1 mrg -6810, 694 1.1 mrg -6626, 695 1.1 mrg -6581, 696 1.1 mrg -6461, 697 1.1 mrg -6278, 698 1.1 mrg -6263, 699 1.1 mrg -6163, 700 1.1 mrg -6029, 701 1.1 mrg -5816, 702 1.1 mrg -5540, 703 1.1 mrg -5461, 704 1.1 mrg -5384, 705 1.1 mrg -5329, 706 1.1 mrg -4985, 707 1.1 mrg -4926, 708 1.1 mrg -4815, 709 1.1 mrg -4788, 710 1.1 mrg -4758, 711 1.1 mrg -4433, 712 1.1 mrg -4229, 713 1.1 mrg -4209, 714 1.1 mrg -4176, 715 1.1 mrg -4104, 716 1.1 mrg -4095, 717 1.1 mrg -4078, 718 1.1 mrg -3941, 719 1.1 mrg -3818, 720 1.1 mrg -3600, 721 1.1 mrg -3474, 722 1.1 mrg -3314, 723 1.1 mrg -3264, 724 1.1 mrg -3196, 725 1.1 mrg -3072, 726 1.1 mrg -2912, 727 1.1 mrg -2836, 728 1.1 mrg -2773, 729 1.1 mrg -2269, 730 1.1 mrg -2184, 731 1.1 mrg -2100, 732 1.1 mrg -1730, 733 1.1 mrg -1512, 734 1.1 mrg -1500, 735 1.1 mrg -1396, 736 1.1 mrg -1344, 737 1.1 mrg -1312, 738 1.1 mrg -1297, 739 1.1 mrg -1059, 740 1.1 mrg -1058, 741 1.1 mrg 1027, 742 1.1 mrg 1049, 743 1.1 mrg 1059, 744 1.1 mrg 1100, 745 1.1 mrg 1104, 746 1.1 mrg 1108, 747 1.1 mrg 1136, 748 1.1 mrg 1200, 749 1.1 mrg 1204, 750 1.1 mrg 1242, 751 1.1 mrg 1292, 752 1.1 mrg 1304, 753 1.1 mrg 1312, 754 1.1 mrg 1320, 755 1.1 mrg 1336, 756 1.1 mrg 1344, 757 1.1 mrg 1348, 758 1.1 mrg 1360, 759 1.1 mrg 1364, 760 1.1 mrg 1395, 761 1.1 mrg 1448, 762 1.1 mrg 1460, 763 1.1 mrg 1461, 764 1.1 mrg 1472, 765 1.1 mrg 1488, 766 1.1 mrg 1500, 767 1.1 mrg 1512, 768 1.1 mrg 1568, 769 1.1 mrg 1576, 770 1.1 mrg 1649, 771 1.1 mrg 1664, 772 1.1 mrg 1684, 773 1.1 mrg 1696, 774 1.1 mrg 1744, 775 1.1 mrg 1812, 776 1.1 mrg 1822, 777 1.1 mrg 1884, 778 1.1 mrg 1963, 779 1.1 mrg 1978, 780 1.1 mrg 2000, 781 1.1 mrg 2012, 782 1.1 mrg 2014, 783 1.1 mrg 2037, 784 1.1 mrg 2039, 785 1.1 mrg 2100, 786 1.1 mrg 2139, 787 1.1 mrg 2144, 788 1.1 mrg 2184, 789 1.1 mrg 2237, 790 1.1 mrg 2260, 791 1.1 mrg 2320, 792 1.1 mrg 2408, 793 1.1 mrg 2446, 794 1.1 mrg 2447, 795 1.1 mrg 2499, 796 1.1 mrg 2531, 797 1.1 mrg 2578, 798 1.1 mrg 2592, 799 1.1 mrg 2611, 800 1.1 mrg 2633, 801 1.1 mrg 2704, 802 1.1 mrg 2730, 803 1.1 mrg 2773, 804 1.1 mrg 2880, 805 1.1 mrg 2896, 806 1.1 mrg 2998, 807 1.1 mrg 3000, 808 1.1 mrg 3001, 809 1.1 mrg 3021, 810 1.1 mrg 3079, 811 1.1 mrg 3112, 812 1.1 mrg 3150, 813 1.1 mrg 3179, 814 1.1 mrg 3192, 815 1.1 mrg 3240, 816 1.1 mrg 3264, 817 1.1 mrg 3271, 818 1.1 mrg 3283, 819 1.1 mrg 3328, 820 1.1 mrg 3363, 821 1.1 mrg 3367, 822 1.1 mrg 3453, 823 1.1 mrg 3529, 824 1.1 mrg 3570, 825 1.1 mrg 3580, 826 1.1 mrg 3600, 827 1.1 mrg 3624, 828 1.1 mrg 3707, 829 1.1 mrg 3783, 830 1.1 mrg 3826, 831 1.1 mrg 3897, 832 1.1 mrg 3941, 833 1.1 mrg 3962, 834 1.1 mrg 3989, 835 1.1 mrg 4000, 836 1.1 mrg 4025, 837 1.1 mrg 4073, 838 1.1 mrg 4093, 839 1.1 mrg 4099, 840 1.1 mrg 4108, 841 1.1 mrg 4184, 842 1.1 mrg 4209, 843 1.1 mrg 4369, 844 1.1 mrg 4376, 845 1.1 mrg 4416, 846 1.1 mrg 4433, 847 1.1 mrg 4434, 848 1.1 mrg 4482, 849 1.1 mrg 4582, 850 1.1 mrg 4712, 851 1.1 mrg 4717, 852 1.1 mrg 4813, 853 1.1 mrg 4815, 854 1.1 mrg 4864, 855 1.1 mrg 5000, 856 1.1 mrg 5027, 857 1.1 mrg 5040, 858 1.1 mrg 5091, 859 1.1 mrg 5195, 860 1.1 mrg 5243, 861 1.1 mrg 5260, 862 1.1 mrg 5285, 863 1.1 mrg 5329, 864 1.1 mrg 5331, 865 1.1 mrg 5350, 866 1.1 mrg 5361, 867 1.1 mrg 5387, 868 1.1 mrg 5461, 869 1.1 mrg 5492, 870 1.1 mrg 5529, 871 1.1 mrg 5573, 872 1.1 mrg 5793, 873 1.1 mrg 5819, 874 1.1 mrg 5915, 875 1.1 mrg 5946, 876 1.1 mrg 5992, 877 1.1 mrg 6000, 878 1.1 mrg 6164, 879 1.1 mrg 6205, 880 1.1 mrg 6262, 881 1.1 mrg 6263, 882 1.1 mrg 6269, 883 1.1 mrg 6270, 884 1.1 mrg 6387, 885 1.1 mrg 6400, 886 1.1 mrg 6406, 887 1.1 mrg 6476, 888 1.1 mrg 6541, 889 1.1 mrg 6565, 890 1.1 mrg 6568, 891 1.1 mrg 6626, 892 1.1 mrg 6656, 893 1.1 mrg 6732, 894 1.1 mrg 6810, 895 1.1 mrg 6817, 896 1.1 mrg 6859, 897 1.1 mrg 7040, 898 1.1 mrg 7053, 899 1.1 mrg 7141, 900 1.1 mrg 7169, 901 1.1 mrg 7221, 902 1.1 mrg 7223, 903 1.1 mrg 7274, 904 1.1 mrg 7282, 905 1.1 mrg 7350, 906 1.1 mrg 7369, 907 1.1 mrg 7373, 908 1.1 mrg 7442, 909 1.1 mrg 7447, 910 1.1 mrg 7471, 911 1.1 mrg 7518, 912 1.1 mrg 7542, 913 1.1 mrg 7566, 914 1.1 mrg 7587, 915 1.1 mrg 7663, 916 1.1 mrg 7678, 917 1.1 mrg 7682, 918 1.1 mrg 7748, 919 1.1 mrg 7752, 920 1.1 mrg 7791, 921 1.1 mrg 8000, 922 1.1 mrg 8026, 923 1.1 mrg 8048, 924 1.1 mrg 8170, 925 1.1 mrg 8203, 926 1.1 mrg 8204, 927 1.1 mrg 8290, 928 1.1 mrg 8368, 929 1.1 mrg 8520, 930 1.1 mrg 8640, 931 1.1 mrg 8666, 932 1.1 mrg 8672, 933 1.1 mrg 8697, 934 1.1 mrg 8716, 935 1.1 mrg 8728, 936 1.1 mrg 8756, 937 1.1 mrg 8820, 938 1.1 mrg 8875, 939 1.1 mrg 8918, 940 1.1 mrg 8956, 941 1.1 mrg 9058, 942 1.1 mrg 9154, 943 1.1 mrg 9175, 944 1.1 mrg 9191, 945 1.1 mrg 9217, 946 1.1 mrg 9262, 947 1.1 mrg 9321, 948 1.1 mrg 9373, 949 1.1 mrg 9434, 950 1.1 mrg 9465, 951 1.1 mrg 9514, 952 1.1 mrg 9534, 953 1.1 mrg 9633, 954 1.1 mrg 9746, 955 1.1 mrg 9810, 956 1.1 mrg 9850, 957 1.1 mrg 9947, 958 1.1 mrg 9973, 959 1.1 mrg 10000, 960 1.1 mrg 10009, 961 1.1 mrg 10033, 962 1.1 mrg 10055, 963 1.1 mrg 10217, 964 1.1 mrg 10248, 965 1.1 mrg 10298, 966 1.1 mrg 10310, 967 1.1 mrg 10323, 968 1.1 mrg 10368, 969 1.1 mrg 10438, 970 1.1 mrg 10456, 971 1.1 mrg 10486, 972 1.1 mrg 10538, 973 1.1 mrg 10664, 974 1.1 mrg 10695, 975 1.1 mrg 10700, 976 1.1 mrg 10703, 977 1.1 mrg 10832, 978 1.1 mrg 10887, 979 1.1 mrg 10935, 980 1.1 mrg 10958, 981 1.1 mrg 11018, 982 1.1 mrg 11059, 983 1.1 mrg 11061, 984 1.1 mrg 11086, 985 1.1 mrg 11116, 986 1.1 mrg 11148, 987 1.1 mrg 11190, 988 1.1 mrg 11249, 989 1.1 mrg 11314, 990 1.1 mrg 11332, 991 1.1 mrg 11363, 992 1.1 mrg 11409, 993 1.1 mrg 11415, 994 1.1 mrg 11443, 995 1.1 mrg 11467, 996 1.1 mrg 11512, 997 1.1 mrg 11522, 998 1.1 mrg 11529, 999 1.1 mrg 11585, 1000 1.1 mrg 11759, 1001 1.1 mrg 11768, 1002 1.1 mrg 11795, 1003 1.1 mrg 11893, 1004 1.1 mrg 11997, 1005 1.1 mrg 12131, 1006 1.1 mrg 12299, 1007 1.1 mrg 12536, 1008 1.1 mrg 12543, 1009 1.1 mrg 12893, 1010 1.1 mrg 12945, 1011 1.1 mrg 12998, 1012 1.1 mrg 13109, 1013 1.1 mrg 13213, 1014 1.1 mrg 13685, 1015 1.1 mrg 13930, 1016 1.1 mrg 14023, 1017 1.1 mrg 14024, 1018 1.1 mrg 14271, 1019 1.1 mrg 14564, 1020 1.1 mrg 14647, 1021 1.1 mrg 15326, 1022 1.1 mrg 15850, 1023 1.1 mrg 15855, 1024 1.1 mrg 15929, 1025 1.1 mrg 16000, 1026 1.1 mrg 16154, 1027 1.1 mrg 16496, 1028 1.1 mrg 16807, 1029 1.1 mrg 16819, 1030 1.1 mrg 16984, 1031 1.1 mrg 17203, 1032 1.1 mrg 17223, 1033 1.1 mrg 17333, 1034 1.1 mrg 17760, 1035 1.1 mrg 17799, 1036 1.1 mrg 17837, 1037 1.1 mrg 18029, 1038 1.1 mrg 18068, 1039 1.1 mrg 18336, 1040 1.1 mrg 18515, 1041 1.1 mrg 19595, 1042 1.1 mrg 20017, 1043 1.1 mrg 20131, 1044 1.1 mrg 20862, 1045 1.1 mrg 20995, 1046 1.1 mrg 21709, 1047 1.1 mrg 22554, 1048 1.1 mrg 25000, 1049 1.1 mrg 25172, 1050 1.1 mrg 25600, 1051 1.1 mrg 25733, 1052 1.1 mrg 27439, 1053 1.1 mrg 38470, 1054 1.1 mrg 46802, 1055 1.1 mrg 50000, 1056 1.1 mrg 11796480, 1057 1.1 mrg 16843009, 1058 1.1 mrg 23592960, 1059 1.1 mrg }; 1060 1.1 mrg 1061 1.1 mrg 1062 1.1 mrg const int num_mult_constants_used = 1063 1.1 mrg (int)(sizeof multiply_constants_used 1064 1.1 mrg / sizeof multiply_constants_used[0]); 1065 1.1 mrg 1066 1.1 mrg 1067 1.1 mrg #define XSIZE (sizeof multiply_constants_used / \ 1068 1.1 mrg sizeof multiply_constants_used[0] + 32) / 32 1069 1.1 mrg unsigned multiply_constants_avail[XSIZE]; 1070 1.1 mrg #undef XSIZE 1071 1.1 mrg 1072 1.1 mrg 1073 1.1 mrg /* bsearch helper function. */ 1074 1.1 mrg static int 1075 1.1 mrg compare_constants (const void *key, const void *t) 1076 1.1 mrg { 1077 1.1 mrg return (*(int*)key) - *((int*)t); 1078 1.1 mrg } 1079 1.1 mrg 1080 1.1 mrg 1081 1.1 mrg static int * 1082 1.1 mrg find_mult_constants_used (int multiplier) 1083 1.1 mrg { 1084 1.1 mrg return (int *) bsearch (&multiplier, multiply_constants_used, 1085 1.1 mrg num_mult_constants_used, 1086 1.1 mrg sizeof multiply_constants_used[0], 1087 1.1 mrg compare_constants); 1088 1.1 mrg } 1089 1.1 mrg 1090 1.1 mrg 1091 1.1 mrg int num_ops (ExpressionTree *s) 1092 1.1 mrg { 1093 1.1 mrg int n = 0; 1094 1.1 mrg for (int i = 0; i < s->m_num_vals; i++) 1095 1.1 mrg { 1096 1.1 mrg Expr *e = &s->m_exprs[i]; 1097 1.1 mrg if (e->m_op != NULL) 1098 1.1 mrg n++; 1099 1.1 mrg } 1100 1.1 mrg return n; 1101 1.1 mrg } 1102 1.1 mrg 1103 1.1 mrg 1104 1.1 mrg #ifdef TILEPRO 1105 1.1 mrg bool 1106 1.1 mrg tilepro_emit (int multiplier, int num_ops) 1107 1.1 mrg { 1108 1.1 mrg int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier; 1109 1.1 mrg 1110 1.1 mrg /* Keep constants in range [-1024, 1024]. */ 1111 1.1 mrg if (abs_multiplier <= 1024) 1112 1.1 mrg return true; 1113 1.1 mrg 1114 1.1 mrg /* Keep constants near powers of two. */ 1115 1.1 mrg int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier)); 1116 1.1 mrg int next_pow2 = prev_pow2 << 1; 1117 1.1 mrg 1118 1.1 mrg if ((abs_multiplier - prev_pow2 <= 10) 1119 1.1 mrg || (next_pow2 - abs_multiplier <= 10)) 1120 1.1 mrg return true; 1121 1.1 mrg 1122 1.1 mrg /* Keep constants near powers of ten. */ 1123 1.1 mrg { 1124 1.1 mrg long long j = 1; 1125 1.1 mrg long long prev_pow10; 1126 1.1 mrg long long next_pow10; 1127 1.1 mrg 1128 1.1 mrg while (((j * 10) < abs_multiplier) 1129 1.1 mrg && (j < (j * 10))) 1130 1.1 mrg j = j * 10; 1131 1.1 mrg 1132 1.1 mrg prev_pow10 = j; 1133 1.1 mrg next_pow10 = j * 10; 1134 1.1 mrg 1135 1.1 mrg if ((abs_multiplier - prev_pow10 <= 10) 1136 1.1 mrg || (next_pow10 - abs_multiplier <= 10)) 1137 1.1 mrg return true; 1138 1.1 mrg } 1139 1.1 mrg 1140 1.1 mrg /* Keep short sequences that have two or fewer ops. */ 1141 1.1 mrg if (num_ops <= 2) 1142 1.1 mrg return true; 1143 1.1 mrg 1144 1.1 mrg /* Keep constants that are mostly 0's or mostly 1's. */ 1145 1.1 mrg if (__builtin_popcount (multiplier) <= 2 || 1146 1.1 mrg __builtin_popcount (multiplier) >= 30) 1147 1.1 mrg return true; 1148 1.1 mrg 1149 1.1 mrg /* Keep constants seen in actual code. */ 1150 1.1 mrg if ((find_mult_constants_used (multiplier))) 1151 1.1 mrg return true; 1152 1.1 mrg 1153 1.1 mrg return false; 1154 1.1 mrg } 1155 1.1 mrg #else 1156 1.1 mrg bool 1157 1.1 mrg tilegx_emit (long long multiplier, int num_ops) 1158 1.1 mrg { 1159 1.1 mrg long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier; 1160 1.1 mrg 1161 1.1 mrg /* Keep constants in range [-1024, 1024]. */ 1162 1.1 mrg if (abs_multiplier <= 1024) 1163 1.1 mrg return true; 1164 1.1 mrg 1165 1.1 mrg /* Otherwise exclude sequences with four ops. */ 1166 1.1 mrg if (num_ops > 3) 1167 1.1 mrg return false; 1168 1.1 mrg 1169 1.1 mrg /* Keep constants near powers of two. */ 1170 1.1 mrg { 1171 1.1 mrg unsigned long long prev_pow2 = 1172 1.1 mrg 1LL << (63 - __builtin_clzll (abs_multiplier)); 1173 1.1 mrg unsigned long long next_pow2 = prev_pow2 << 1; 1174 1.1 mrg 1175 1.1 mrg /* handle overflow case. */ 1176 1.1 mrg if (next_pow2 == 0) 1177 1.1 mrg { 1178 1.1 mrg if (prev_pow2 - abs_multiplier <= 10) 1179 1.1 mrg return true; 1180 1.1 mrg } 1181 1.1 mrg else if ((abs_multiplier - prev_pow2 <= 10) 1182 1.1 mrg || (next_pow2 - abs_multiplier <= 10)) 1183 1.1 mrg return true; 1184 1.1 mrg } 1185 1.1 mrg 1186 1.1 mrg /* Keep constants near powers of ten. */ 1187 1.1 mrg { 1188 1.1 mrg long long j = 1; 1189 1.1 mrg long long prev_pow10; 1190 1.1 mrg long long next_pow10; 1191 1.1 mrg 1192 1.1 mrg while (((j * 10) < abs_multiplier) 1193 1.9 mrg && (j < (j * 10))) 1194 1.1 mrg j = j * 10; 1195 1.1 mrg 1196 1.1 mrg prev_pow10 = j; 1197 1.9 mrg next_pow10 = j * 10; 1198 1.1 mrg 1199 1.1 mrg if ((abs_multiplier - prev_pow10 <= 100) 1200 1.1 mrg || (next_pow10 1201 1.1 mrg && (next_pow10 - abs_multiplier <= 100))) 1202 1.1 mrg return true; 1203 1.1 mrg } 1204 1.1 mrg 1205 1.1 mrg if (num_ops <= 2) 1206 1.1 mrg return true; 1207 1.1 mrg 1208 1.1 mrg /* Keep constants that are mostly 0's or mostly 1's. */ 1209 1.1 mrg if (__builtin_popcountll (multiplier) <= 2 || 1210 1.1 mrg __builtin_popcountll (multiplier) >= 62) 1211 1.1 mrg return true; 1212 1.1 mrg 1213 1.1 mrg /* Keep constants seen in actual code. */ 1214 1.1 mrg if (find_mult_constants_used (multiplier)) 1215 1.1 mrg return true; 1216 1.1 mrg 1217 1.1 mrg return false; 1218 1.1 mrg } 1219 1.1 mrg #endif 1220 1.1 mrg 1221 1.1 mrg 1222 1.1 mrg int 1223 1.1 mrg main () 1224 1.1 mrg { 1225 1.1 mrg ExpressionTreeMap best_solution; 1226 1.1 mrg ExpressionTree s; 1227 1.1 mrg 1228 1.1 mrg #ifdef TILEPRO 1229 1.1 mrg printf ("/* Constant multiply table for TILEPro.\n"); 1230 1.1 mrg #else 1231 1.1 mrg printf ("/* Constant multiply table for TILE-Gx.\n"); 1232 1.1 mrg #endif 1233 1.10 mrg printf (" Copyright (C) 2011-2022 Free Software Foundation, Inc.\n"); 1234 1.1 mrg printf (" Contributed by Walter Lee (walt (at) tilera.com)\n"); 1235 1.1 mrg printf ("\n"); 1236 1.1 mrg printf (" This file is part of GCC.\n"); 1237 1.1 mrg printf ("\n"); 1238 1.1 mrg printf (" GCC is free software; you can redistribute it and/or modify it\n"); 1239 1.1 mrg printf (" under the terms of the GNU General Public License as published\n"); 1240 1.1 mrg printf (" by the Free Software Foundation; either version 3, or (at your\n"); 1241 1.1 mrg printf (" option) any later version.\n"); 1242 1.1 mrg printf ("\n"); 1243 1.1 mrg printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n"); 1244 1.1 mrg printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n"); 1245 1.1 mrg printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n"); 1246 1.1 mrg printf (" License for more details.\n"); 1247 1.1 mrg printf ("\n"); 1248 1.1 mrg printf (" You should have received a copy of the GNU General Public License\n"); 1249 1.1 mrg printf (" along with GCC; see the file COPYING3. If not see\n"); 1250 1.1 mrg printf (" <http://www.gnu.org/licenses/>. */\n"); 1251 1.1 mrg printf ("\n"); 1252 1.3 mrg printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n"); 1253 1.3 mrg printf (" Make any required changes there. */\n"); 1254 1.3 mrg printf ("\n"); 1255 1.10 mrg printf ("#define IN_TARGET_CODE 1\n"); 1256 1.10 mrg printf ("\n"); 1257 1.1 mrg printf ("#include \"config.h\"\n"); 1258 1.1 mrg printf ("#include \"system.h\"\n"); 1259 1.1 mrg printf ("#include \"coretypes.h\"\n"); 1260 1.4 mrg printf ("#include \"backend.h\"\n"); 1261 1.3 mrg printf ("#include \"rtl.h\"\n"); 1262 1.3 mrg printf ("#include \"expmed.h\"\n"); 1263 1.1 mrg printf ("#include \"%s-multiply.h\"\n\n", ARCH); 1264 1.1 mrg create_insn_code_compression_table (); 1265 1.1 mrg 1266 1.1 mrg /* Try all combinations of operators and see what constants we 1267 1.1 mrg produce. For each possible constant, record the most efficient 1268 1.1 mrg way to generate it. */ 1269 1.1 mrg find_sequences (s, best_solution); 1270 1.1 mrg 1271 1.1 mrg printf ("const struct %s_multiply_insn_seq " 1272 1.1 mrg "%s_multiply_insn_seq_table[] = {\n", 1273 1.1 mrg ARCH, ARCH); 1274 1.1 mrg 1275 1.1 mrg const char *comma_separator = ""; 1276 1.1 mrg 1277 1.1 mrg ExpressionTreeMap::iterator i (best_solution.begin ()); 1278 1.1 mrg for (; i != best_solution.end (); ++i) 1279 1.1 mrg { 1280 1.1 mrg ExpressionTree *s = (*i).second; 1281 1.1 mrg const MUL_TYPE n = (*i).first; 1282 1.1 mrg 1283 1.1 mrg if (n == 0 || n == 1) 1284 1.1 mrg { 1285 1.1 mrg /* Both of these require zero operations, so these entries 1286 1.1 mrg are bogus and should never be used. */ 1287 1.1 mrg continue; 1288 1.1 mrg } 1289 1.1 mrg 1290 1.1 mrg /* Prune the list of entries to keep the table to a reasonable 1291 1.1 mrg size. */ 1292 1.1 mrg #ifdef TILEPRO 1293 1.1 mrg if (!tilepro_emit (n, num_ops (s))) 1294 1.1 mrg continue; 1295 1.1 mrg #else 1296 1.1 mrg if (!tilegx_emit (n, num_ops (s))) 1297 1.1 mrg continue; 1298 1.1 mrg #endif 1299 1.1 mrg 1300 1.1 mrg printf ("%s", comma_separator); 1301 1.1 mrg 1302 1.1 mrg #ifdef TILEPRO 1303 1.1 mrg const MUL_TYPE int_min = INT32_MIN; 1304 1.1 mrg #else 1305 1.1 mrg const MUL_TYPE int_min = INT64_MIN; 1306 1.1 mrg #endif 1307 1.1 mrg if (n == int_min) 1308 1.1 mrg { 1309 1.1 mrg /* Handle C's woeful lack of unary negation. Without this, 1310 1.1 mrg printing out INT_MIN in decimal will yield an unsigned 1311 1.1 mrg int which could generate a compiler warning. */ 1312 1.1 mrg #ifdef TILEPRO 1313 1.1 mrg printf (" {%d - 1 /* 0x%x */ ,\n {", n + 1, 1314 1.1 mrg (unsigned) n); 1315 1.1 mrg #else 1316 1.1 mrg printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n + 1, 1317 1.1 mrg (unsigned MUL_TYPE) n); 1318 1.1 mrg #endif 1319 1.1 mrg } 1320 1.1 mrg else 1321 1.1 mrg { 1322 1.1 mrg #ifdef TILEPRO 1323 1.1 mrg printf (" {%d /* 0x%x */ ,\n {", n, (unsigned) n); 1324 1.1 mrg #else 1325 1.1 mrg printf (" {%lldll /* 0x%llx */ ,\n {", n, (unsigned MUL_TYPE) n); 1326 1.1 mrg #endif 1327 1.1 mrg } 1328 1.1 mrg 1329 1.1 mrg bool first = true; 1330 1.1 mrg for (int j = 0; j < s->m_num_vals; j++) 1331 1.1 mrg { 1332 1.1 mrg Expr *e = &s->m_exprs[j]; 1333 1.1 mrg 1334 1.1 mrg const Operator *op = e->m_op; 1335 1.1 mrg if (op == NULL) 1336 1.1 mrg continue; 1337 1.1 mrg 1338 1.1 mrg char buf[1024]; 1339 1.1 mrg snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s", 1340 1.1 mrg first ? "" : " ", 1341 1.1 mrg op->m_top_index, 1342 1.1 mrg e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ","); 1343 1.1 mrg 1344 1.1 mrg char opnd0[10]; 1345 1.1 mrg if (e->m_lhs) 1346 1.1 mrg snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs); 1347 1.1 mrg else 1348 1.1 mrg snprintf (opnd0, sizeof opnd0, "zero"); 1349 1.1 mrg 1350 1.1 mrg printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n", 1351 1.1 mrg buf, op->m_name, j, opnd0, 1352 1.1 mrg op->is_unary () ? "" : "r", e->m_rhs); 1353 1.1 mrg 1354 1.1 mrg first = false; 1355 1.1 mrg } 1356 1.1 mrg printf (" }"); 1357 1.1 mrg comma_separator = ",\n"; 1358 1.1 mrg } 1359 1.1 mrg 1360 1.1 mrg printf ("\n};\n\n"); 1361 1.1 mrg printf ("const int %s_multiply_insn_seq_table_size =\n" 1362 1.1 mrg " (int) (sizeof %s_multiply_insn_seq_table\n" 1363 1.1 mrg " / sizeof %s_multiply_insn_seq_table[0]);\n", 1364 1.1 mrg ARCH, ARCH, ARCH); 1365 1.1 mrg 1366 1.1 mrg return EXIT_SUCCESS; 1367 1.1 mrg } 1368