1 1.1 mrg /* Offload image generation tool for AMD GCN. 2 1.1 mrg 3 1.1 mrg Copyright (C) 2014-2022 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg This file is part of GCC. 6 1.1 mrg 7 1.1 mrg GCC is free software; you can redistribute it and/or modify it 8 1.1 mrg under the terms of the GNU General Public License as published 9 1.1 mrg by the Free Software Foundation; either version 3, or (at your 10 1.1 mrg option) any later version. 11 1.1 mrg 12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 1.1 mrg License for more details. 16 1.1 mrg 17 1.1 mrg You should have received a copy of the GNU General Public License 18 1.1 mrg along with GCC; see the file COPYING3. If not see 19 1.1 mrg <http://www.gnu.org/licenses/>. */ 20 1.1 mrg 21 1.1 mrg /* Munges GCN assembly into a C source file defining the GCN code as a 22 1.1 mrg string. 23 1.1 mrg 24 1.1 mrg This is not a complete assembler. We presume the source is well 25 1.1 mrg formed from the compiler and can die horribly if it is not. */ 26 1.1 mrg 27 1.1 mrg #include "config.h" 28 1.1 mrg #include "system.h" 29 1.1 mrg #include "coretypes.h" 30 1.1 mrg #include "obstack.h" 31 1.1 mrg #include "diagnostic.h" 32 1.1 mrg #include "intl.h" 33 1.1 mrg #include <libgen.h> 34 1.1 mrg #include "collect-utils.h" 35 1.1 mrg #include "gomp-constants.h" 36 1.1 mrg #include "simple-object.h" 37 1.1 mrg #include "elf.h" 38 1.1 mrg 39 1.1 mrg /* These probably won't (all) be in elf.h for a while. */ 40 1.1 mrg #undef EM_AMDGPU 41 1.1 mrg #define EM_AMDGPU 0xe0; 42 1.1 mrg 43 1.1 mrg #undef ELFOSABI_AMDGPU_HSA 44 1.1 mrg #define ELFOSABI_AMDGPU_HSA 64 45 1.1 mrg #undef ELFABIVERSION_AMDGPU_HSA_V3 46 1.1 mrg #define ELFABIVERSION_AMDGPU_HSA_V3 1 47 1.1 mrg #undef ELFABIVERSION_AMDGPU_HSA_V4 48 1.1 mrg #define ELFABIVERSION_AMDGPU_HSA_V4 2 49 1.1 mrg 50 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX803 51 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a 52 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX900 53 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c 54 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX906 55 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f 56 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX908 57 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30 58 1.1 mrg 59 1.1 mrg #define EF_AMDGPU_XNACK_V3 0x100 60 1.1 mrg #define EF_AMDGPU_SRAM_ECC_V3 0x200 61 1.1 mrg 62 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */ 63 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000 64 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_ANY_V4 0x100 65 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_OFF_V4 0x200 66 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_ON_V4 0x300 67 1.1 mrg 68 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_V4 0xc00 /* Mask. */ 69 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 0x000 70 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 0x400 71 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 0x800 72 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_ON_V4 0xc00 73 1.1 mrg 74 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX 75 1.1 mrg #define SET_XNACK_ON(VAR) VAR |= EF_AMDGPU_XNACK_V3 76 1.1 mrg #define SET_XNACK_OFF(VAR) VAR &= ~EF_AMDGPU_XNACK_V3 77 1.1 mrg #define TEST_XNACK(VAR) (VAR & EF_AMDGPU_XNACK_V3) 78 1.1 mrg 79 1.1 mrg #define SET_SRAM_ECC_ON(VAR) VAR |= EF_AMDGPU_SRAM_ECC_V3 80 1.1 mrg #define SET_SRAM_ECC_ANY(VAR) SET_SRAM_ECC_ON (VAR) 81 1.1 mrg #define SET_SRAM_ECC_OFF(VAR) VAR &= ~EF_AMDGPU_SRAM_ECC_V3 82 1.1 mrg #define SET_SRAM_ECC_UNSUPPORTED(VAR) SET_SRAM_ECC_OFF (VAR) 83 1.1 mrg #define TEST_SRAM_ECC_ANY(VAR) 0 /* Not supported. */ 84 1.1 mrg #define TEST_SRAM_ECC_ON(VAR) (VAR & EF_AMDGPU_SRAM_ECC_V3) 85 1.1 mrg #endif 86 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX 87 1.1 mrg #define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \ 88 1.1 mrg | EF_AMDGPU_FEATURE_XNACK_ON_V4) 89 1.1 mrg #define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \ 90 1.1 mrg | EF_AMDGPU_FEATURE_XNACK_OFF_V4) 91 1.1 mrg #define TEST_XNACK(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \ 92 1.1 mrg == EF_AMDGPU_FEATURE_XNACK_ON_V4) 93 1.1 mrg 94 1.1 mrg #define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \ 95 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_ON_V4) 96 1.1 mrg #define SET_SRAM_ECC_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \ 97 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4) 98 1.1 mrg #define SET_SRAM_ECC_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \ 99 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_OFF_V4) 100 1.1 mrg #define SET_SRAM_ECC_UNSUPPORTED(VAR) \ 101 1.1 mrg VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \ 102 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4) 103 1.1 mrg #define TEST_SRAM_ECC_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \ 104 1.1 mrg == EF_AMDGPU_FEATURE_SRAMECC_ANY_V4) 105 1.1 mrg #define TEST_SRAM_ECC_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \ 106 1.1 mrg == EF_AMDGPU_FEATURE_SRAMECC_ON_V4) 107 1.1 mrg #endif 108 1.1 mrg 109 1.1 mrg #ifndef R_AMDGPU_NONE 110 1.1 mrg #define R_AMDGPU_NONE 0 111 1.1 mrg #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */ 112 1.1 mrg #define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */ 113 1.1 mrg #define R_AMDGPU_ABS64 3 /* S + A */ 114 1.1 mrg #define R_AMDGPU_REL32 4 /* S + A - P */ 115 1.1 mrg #define R_AMDGPU_REL64 5 /* S + A - P */ 116 1.1 mrg #define R_AMDGPU_ABS32 6 /* S + A */ 117 1.1 mrg #define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */ 118 1.1 mrg #define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */ 119 1.1 mrg #define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */ 120 1.1 mrg #define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */ 121 1.1 mrg #define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */ 122 1.1 mrg #define R_AMDGPU_RELATIVE64 13 /* B + A */ 123 1.1 mrg #endif 124 1.1 mrg 125 1.1 mrg const char tool_name[] = "gcn mkoffload"; 126 1.1 mrg 127 1.1 mrg static const char *gcn_dumpbase; 128 1.1 mrg static struct obstack files_to_cleanup; 129 1.1 mrg 130 1.1 mrg enum offload_abi offload_abi = OFFLOAD_ABI_UNSET; 131 1.1 mrg uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture. 132 1.1 mrg uint32_t elf_flags = 133 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX 134 1.1 mrg 0; 135 1.1 mrg #endif 136 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX 137 1.1 mrg (EF_AMDGPU_FEATURE_XNACK_ANY_V4 | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4); 138 1.1 mrg #endif 139 1.1 mrg 140 1.1 mrg /* Delete tempfiles. */ 141 1.1 mrg 142 1.1 mrg void 143 1.1 mrg tool_cleanup (bool from_signal ATTRIBUTE_UNUSED) 144 1.1 mrg { 145 1.1 mrg obstack_ptr_grow (&files_to_cleanup, NULL); 146 1.1 mrg const char **files = XOBFINISH (&files_to_cleanup, const char **); 147 1.1 mrg for (int i = 0; files[i]; i++) 148 1.1 mrg maybe_unlink (files[i]); 149 1.1 mrg } 150 1.1 mrg 151 1.1 mrg static void 152 1.1 mrg mkoffload_cleanup (void) 153 1.1 mrg { 154 1.1 mrg tool_cleanup (false); 155 1.1 mrg } 156 1.1 mrg 157 1.1 mrg /* Unlink FILE unless requested otherwise. */ 158 1.1 mrg 159 1.1 mrg void 160 1.1 mrg maybe_unlink (const char *file) 161 1.1 mrg { 162 1.1 mrg if (!save_temps) 163 1.1 mrg { 164 1.1 mrg if (unlink_if_ordinary (file) && errno != ENOENT) 165 1.1 mrg fatal_error (input_location, "deleting file %s: %m", file); 166 1.1 mrg } 167 1.1 mrg else if (verbose) 168 1.1 mrg fprintf (stderr, "[Leaving %s]\n", file); 169 1.1 mrg } 170 1.1 mrg 171 1.1 mrg /* Add or change the value of an environment variable, outputting the 172 1.1 mrg change to standard error if in verbose mode. */ 173 1.1 mrg 174 1.1 mrg static void 175 1.1 mrg xputenv (const char *string) 176 1.1 mrg { 177 1.1 mrg if (verbose) 178 1.1 mrg fprintf (stderr, "%s\n", string); 179 1.1 mrg putenv (CONST_CAST (char *, string)); 180 1.1 mrg } 181 1.1 mrg 182 1.1 mrg /* Read the whole input file. It will be NUL terminated (but 183 1.1 mrg remember, there could be a NUL in the file itself. */ 184 1.1 mrg 185 1.1 mrg static const char * 186 1.1 mrg read_file (FILE *stream, size_t *plen) 187 1.1 mrg { 188 1.1 mrg size_t alloc = 16384; 189 1.1 mrg size_t base = 0; 190 1.1 mrg char *buffer; 191 1.1 mrg 192 1.1 mrg if (!fseek (stream, 0, SEEK_END)) 193 1.1 mrg { 194 1.1 mrg /* Get the file size. */ 195 1.1 mrg long s = ftell (stream); 196 1.1 mrg if (s >= 0) 197 1.1 mrg alloc = s + 100; 198 1.1 mrg fseek (stream, 0, SEEK_SET); 199 1.1 mrg } 200 1.1 mrg buffer = XNEWVEC (char, alloc); 201 1.1 mrg 202 1.1 mrg for (;;) 203 1.1 mrg { 204 1.1 mrg size_t n = fread (buffer + base, 1, alloc - base - 1, stream); 205 1.1 mrg 206 1.1 mrg if (!n) 207 1.1 mrg break; 208 1.1 mrg base += n; 209 1.1 mrg if (base + 1 == alloc) 210 1.1 mrg { 211 1.1 mrg alloc *= 2; 212 1.1 mrg buffer = XRESIZEVEC (char, buffer, alloc); 213 1.1 mrg } 214 1.1 mrg } 215 1.1 mrg buffer[base] = 0; 216 1.1 mrg *plen = base; 217 1.1 mrg return buffer; 218 1.1 mrg } 219 1.1 mrg 220 1.1 mrg /* Parse STR, saving found tokens into PVALUES and return their number. 221 1.1 mrg Tokens are assumed to be delimited by ':'. */ 222 1.1 mrg 223 1.1 mrg static unsigned 224 1.1 mrg parse_env_var (const char *str, char ***pvalues) 225 1.1 mrg { 226 1.1 mrg const char *curval, *nextval; 227 1.1 mrg char **values; 228 1.1 mrg unsigned num = 1, i; 229 1.1 mrg 230 1.1 mrg curval = strchr (str, ':'); 231 1.1 mrg while (curval) 232 1.1 mrg { 233 1.1 mrg num++; 234 1.1 mrg curval = strchr (curval + 1, ':'); 235 1.1 mrg } 236 1.1 mrg 237 1.1 mrg values = (char **) xmalloc (num * sizeof (char *)); 238 1.1 mrg curval = str; 239 1.1 mrg nextval = strchr (curval, ':'); 240 1.1 mrg if (nextval == NULL) 241 1.1 mrg nextval = strchr (curval, '\0'); 242 1.1 mrg 243 1.1 mrg for (i = 0; i < num; i++) 244 1.1 mrg { 245 1.1 mrg int l = nextval - curval; 246 1.1 mrg values[i] = (char *) xmalloc (l + 1); 247 1.1 mrg memcpy (values[i], curval, l); 248 1.1 mrg values[i][l] = 0; 249 1.1 mrg curval = nextval + 1; 250 1.1 mrg nextval = strchr (curval, ':'); 251 1.1 mrg if (nextval == NULL) 252 1.1 mrg nextval = strchr (curval, '\0'); 253 1.1 mrg } 254 1.1 mrg *pvalues = values; 255 1.1 mrg return num; 256 1.1 mrg } 257 1.1 mrg 258 1.1 mrg /* Auxiliary function that frees elements of PTR and PTR itself. 259 1.1 mrg N is number of elements to be freed. If PTR is NULL, nothing is freed. 260 1.1 mrg If an element is NULL, subsequent elements are not freed. */ 261 1.1 mrg 262 1.1 mrg static void 263 1.1 mrg free_array_of_ptrs (void **ptr, unsigned n) 264 1.1 mrg { 265 1.1 mrg unsigned i; 266 1.1 mrg if (!ptr) 267 1.1 mrg return; 268 1.1 mrg for (i = 0; i < n; i++) 269 1.1 mrg { 270 1.1 mrg if (!ptr[i]) 271 1.1 mrg break; 272 1.1 mrg free (ptr[i]); 273 1.1 mrg } 274 1.1 mrg free (ptr); 275 1.1 mrg return; 276 1.1 mrg } 277 1.1 mrg 278 1.1 mrg /* Check whether NAME can be accessed in MODE. This is like access, 279 1.1 mrg except that it never considers directories to be executable. */ 280 1.1 mrg 281 1.1 mrg static int 282 1.1 mrg access_check (const char *name, int mode) 283 1.1 mrg { 284 1.1 mrg if (mode == X_OK) 285 1.1 mrg { 286 1.1 mrg struct stat st; 287 1.1 mrg 288 1.1 mrg if (stat (name, &st) < 0 || S_ISDIR (st.st_mode)) 289 1.1 mrg return -1; 290 1.1 mrg } 291 1.1 mrg 292 1.1 mrg return access (name, mode); 293 1.1 mrg } 294 1.1 mrg 295 1.1 mrg /* Copy the early-debug-info from the incoming LTO object to a new object 296 1.1 mrg that will be linked into the output HSACO file. The host relocations 297 1.1 mrg must be translated into GCN relocations, and any global undefined symbols 298 1.1 mrg must be weakened (so as not to have the debug info try to pull in host 299 1.1 mrg junk). 300 1.1 mrg 301 1.1 mrg Returns true if the file was created, false otherwise. */ 302 1.1 mrg 303 1.1 mrg static bool 304 1.1 mrg copy_early_debug_info (const char *infile, const char *outfile) 305 1.1 mrg { 306 1.1 mrg const char *errmsg; 307 1.1 mrg int err; 308 1.1 mrg 309 1.1 mrg /* The simple_object code can handle extracting the debug sections. 310 1.1 mrg This code is based on that in lto-wrapper.cc. */ 311 1.1 mrg int infd = open (infile, O_RDONLY | O_BINARY); 312 1.1 mrg if (infd == -1) 313 1.1 mrg return false; 314 1.1 mrg simple_object_read *inobj = simple_object_start_read (infd, 0, 315 1.1 mrg "__GNU_LTO", 316 1.1 mrg &errmsg, &err); 317 1.1 mrg if (!inobj) 318 1.1 mrg return false; 319 1.1 mrg 320 1.1 mrg off_t off, len; 321 1.1 mrg if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info", 322 1.1 mrg &off, &len, &errmsg, &err) != 1) 323 1.1 mrg { 324 1.1 mrg simple_object_release_read (inobj); 325 1.1 mrg close (infd); 326 1.1 mrg return false; 327 1.1 mrg } 328 1.1 mrg 329 1.1 mrg errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, true); 330 1.1 mrg if (errmsg) 331 1.1 mrg { 332 1.1 mrg unlink_if_ordinary (outfile); 333 1.1 mrg return false; 334 1.1 mrg } 335 1.1 mrg 336 1.1 mrg simple_object_release_read (inobj); 337 1.1 mrg close (infd); 338 1.1 mrg 339 1.1 mrg /* Open the file we just created for some adjustments. 340 1.1 mrg The simple_object code can't do this, so we do it manually. */ 341 1.1 mrg FILE *outfd = fopen (outfile, "r+b"); 342 1.1 mrg if (!outfd) 343 1.1 mrg return false; 344 1.1 mrg 345 1.1 mrg Elf64_Ehdr ehdr; 346 1.1 mrg if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1) 347 1.1 mrg { 348 1.1 mrg fclose (outfd); 349 1.1 mrg return true; 350 1.1 mrg } 351 1.1 mrg 352 1.1 mrg /* We only support host relocations of x86_64, for now. */ 353 1.1 mrg gcc_assert (ehdr.e_machine == EM_X86_64); 354 1.1 mrg 355 1.1 mrg /* Fiji devices use HSACOv3 regardless of the assembler. */ 356 1.1 mrg uint32_t elf_flags_actual = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803 357 1.1 mrg ? 0 : elf_flags); 358 1.1 mrg /* GFX900 devices don't support the sramecc attribute even if 359 1.1 mrg a buggy assembler thinks it does. This must match gcn-hsa.h */ 360 1.1 mrg if (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX900) 361 1.1 mrg SET_SRAM_ECC_UNSUPPORTED (elf_flags_actual); 362 1.1 mrg 363 1.1 mrg /* Patch the correct elf architecture flag into the file. */ 364 1.1 mrg ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA; 365 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX 366 1.1 mrg ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA_V3; 367 1.1 mrg #endif 368 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX 369 1.1 mrg ehdr.e_ident[8] = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803 370 1.1 mrg ? ELFABIVERSION_AMDGPU_HSA_V3 371 1.1 mrg : ELFABIVERSION_AMDGPU_HSA_V4); 372 1.1 mrg #endif 373 1.1 mrg ehdr.e_type = ET_REL; 374 1.1 mrg ehdr.e_machine = EM_AMDGPU; 375 1.1 mrg ehdr.e_flags = elf_arch | elf_flags_actual; 376 1.1 mrg 377 1.1 mrg /* Load the section headers so we can walk them later. */ 378 1.1 mrg Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr) 379 1.1 mrg * ehdr.e_shnum); 380 1.1 mrg if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1 381 1.1 mrg || fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum, 382 1.1 mrg outfd) != ehdr.e_shnum) 383 1.1 mrg { 384 1.1 mrg free (sections); 385 1.1 mrg fclose (outfd); 386 1.1 mrg return true; 387 1.1 mrg } 388 1.1 mrg 389 1.1 mrg /* Convert the host relocations to target relocations. */ 390 1.1 mrg for (int i = 0; i < ehdr.e_shnum; i++) 391 1.1 mrg { 392 1.1 mrg if (sections[i].sh_type != SHT_RELA) 393 1.1 mrg continue; 394 1.1 mrg 395 1.1 mrg char *data = (char *)xmalloc (sections[i].sh_size); 396 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1 397 1.1 mrg || fread (data, sections[i].sh_size, 1, outfd) != 1) 398 1.1 mrg { 399 1.1 mrg free (data); 400 1.1 mrg continue; 401 1.1 mrg } 402 1.1 mrg 403 1.1 mrg for (size_t offset = 0; 404 1.1 mrg offset < sections[i].sh_size; 405 1.1 mrg offset += sections[i].sh_entsize) 406 1.1 mrg { 407 1.1 mrg Elf64_Rela *reloc = (Elf64_Rela *) (data + offset); 408 1.1 mrg 409 1.1 mrg /* Map the host relocations to GCN relocations. 410 1.1 mrg Only relocations that can appear in DWARF need be handled. */ 411 1.1 mrg switch (ELF64_R_TYPE (reloc->r_info)) 412 1.1 mrg { 413 1.1 mrg case R_X86_64_32: 414 1.1 mrg case R_X86_64_32S: 415 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info), 416 1.1 mrg R_AMDGPU_ABS32); 417 1.1 mrg break; 418 1.1 mrg case R_X86_64_PC32: 419 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info), 420 1.1 mrg R_AMDGPU_REL32); 421 1.1 mrg break; 422 1.1 mrg case R_X86_64_PC64: 423 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info), 424 1.1 mrg R_AMDGPU_REL64); 425 1.1 mrg break; 426 1.1 mrg case R_X86_64_64: 427 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info), 428 1.1 mrg R_AMDGPU_ABS64); 429 1.1 mrg break; 430 1.1 mrg case R_X86_64_RELATIVE: 431 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info), 432 1.1 mrg R_AMDGPU_RELATIVE64); 433 1.1 mrg break; 434 1.1 mrg default: 435 1.1 mrg gcc_unreachable (); 436 1.1 mrg } 437 1.1 mrg } 438 1.1 mrg 439 1.1 mrg /* Write back our relocation changes. */ 440 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1) 441 1.1 mrg fwrite (data, sections[i].sh_size, 1, outfd); 442 1.1 mrg 443 1.1 mrg free (data); 444 1.1 mrg } 445 1.1 mrg 446 1.1 mrg /* Weaken any global undefined symbols that would pull in unwanted 447 1.1 mrg objects. */ 448 1.1 mrg for (int i = 0; i < ehdr.e_shnum; i++) 449 1.1 mrg { 450 1.1 mrg if (sections[i].sh_type != SHT_SYMTAB) 451 1.1 mrg continue; 452 1.1 mrg 453 1.1 mrg char *data = (char *)xmalloc (sections[i].sh_size); 454 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1 455 1.1 mrg || fread (data, sections[i].sh_size, 1, outfd) != 1) 456 1.1 mrg { 457 1.1 mrg free (data); 458 1.1 mrg continue; 459 1.1 mrg } 460 1.1 mrg 461 1.1 mrg for (size_t offset = 0; 462 1.1 mrg offset < sections[i].sh_size; 463 1.1 mrg offset += sections[i].sh_entsize) 464 1.1 mrg { 465 1.1 mrg Elf64_Sym *sym = (Elf64_Sym *) (data + offset); 466 1.1 mrg int type = ELF64_ST_TYPE (sym->st_info); 467 1.1 mrg int bind = ELF64_ST_BIND (sym->st_info); 468 1.1 mrg 469 1.1 mrg if (bind == STB_GLOBAL && sym->st_shndx == 0) 470 1.1 mrg sym->st_info = ELF64_ST_INFO (STB_WEAK, type); 471 1.1 mrg } 472 1.1 mrg 473 1.1 mrg /* Write back our symbol changes. */ 474 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1) 475 1.1 mrg fwrite (data, sections[i].sh_size, 1, outfd); 476 1.1 mrg 477 1.1 mrg free (data); 478 1.1 mrg } 479 1.1 mrg free (sections); 480 1.1 mrg 481 1.1 mrg /* Write back our header changes. */ 482 1.1 mrg rewind (outfd); 483 1.1 mrg fwrite (&ehdr, sizeof (ehdr), 1, outfd); 484 1.1 mrg 485 1.1 mrg fclose (outfd); 486 1.1 mrg return true; 487 1.1 mrg } 488 1.1 mrg 489 1.1 mrg /* Parse an input assembler file, extract the offload tables etc., 490 1.1 mrg and output (1) the assembler code, minus the tables (which can contain 491 1.1 mrg problematic relocations), and (2) a C file with the offload tables 492 1.1 mrg encoded as structured data. */ 493 1.1 mrg 494 1.1 mrg static void 495 1.1 mrg process_asm (FILE *in, FILE *out, FILE *cfile) 496 1.1 mrg { 497 1.1 mrg int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0; 498 1.1 mrg struct obstack fns_os, dims_os, regcounts_os; 499 1.1 mrg obstack_init (&fns_os); 500 1.1 mrg obstack_init (&dims_os); 501 1.1 mrg obstack_init (®counts_os); 502 1.1 mrg 503 1.1 mrg struct oaccdims 504 1.1 mrg { 505 1.1 mrg int d[3]; 506 1.1 mrg char *name; 507 1.1 mrg } dim; 508 1.1 mrg 509 1.1 mrg struct regcount 510 1.1 mrg { 511 1.1 mrg int sgpr_count; 512 1.1 mrg int vgpr_count; 513 1.1 mrg char *kernel_name; 514 1.1 mrg } regcount = { -1, -1, NULL }; 515 1.1 mrg 516 1.1 mrg /* Always add _init_array and _fini_array as kernels. */ 517 1.1 mrg obstack_ptr_grow (&fns_os, xstrdup ("_init_array")); 518 1.1 mrg obstack_ptr_grow (&fns_os, xstrdup ("_fini_array")); 519 1.1 mrg fn_count += 2; 520 1.1 mrg 521 1.1 mrg char buf[1000]; 522 1.1 mrg enum 523 1.1 mrg { IN_CODE, 524 1.1 mrg IN_METADATA, 525 1.1 mrg IN_VARS, 526 1.1 mrg IN_FUNCS 527 1.1 mrg } state = IN_CODE; 528 1.1 mrg while (fgets (buf, sizeof (buf), in)) 529 1.1 mrg { 530 1.1 mrg switch (state) 531 1.1 mrg { 532 1.1 mrg case IN_CODE: 533 1.1 mrg { 534 1.1 mrg if (sscanf (buf, " ;; OPENACC-DIMS: %d, %d, %d : %ms\n", 535 1.1 mrg &dim.d[0], &dim.d[1], &dim.d[2], &dim.name) == 4) 536 1.1 mrg { 537 1.1 mrg obstack_grow (&dims_os, &dim, sizeof (dim)); 538 1.1 mrg dims_count++; 539 1.1 mrg } 540 1.1 mrg 541 1.1 mrg break; 542 1.1 mrg } 543 1.1 mrg case IN_METADATA: 544 1.1 mrg { 545 1.1 mrg if (sscanf (buf, " - .name: %ms\n", ®count.kernel_name) == 1) 546 1.1 mrg break; 547 1.1 mrg else if (sscanf (buf, " .sgpr_count: %d\n", 548 1.1 mrg ®count.sgpr_count) == 1) 549 1.1 mrg { 550 1.1 mrg gcc_assert (regcount.kernel_name); 551 1.1 mrg break; 552 1.1 mrg } 553 1.1 mrg else if (sscanf (buf, " .vgpr_count: %d\n", 554 1.1 mrg ®count.vgpr_count) == 1) 555 1.1 mrg { 556 1.1 mrg gcc_assert (regcount.kernel_name); 557 1.1 mrg break; 558 1.1 mrg } 559 1.1 mrg 560 1.1 mrg break; 561 1.1 mrg } 562 1.1 mrg case IN_VARS: 563 1.1 mrg { 564 1.1 mrg char *varname; 565 1.1 mrg unsigned varsize; 566 1.1 mrg if (sscanf (buf, " .8byte %ms\n", &varname)) 567 1.1 mrg { 568 1.1 mrg fputs (buf, out); 569 1.1 mrg fgets (buf, sizeof (buf), in); 570 1.1 mrg if (!sscanf (buf, " .8byte %u\n", &varsize)) 571 1.1 mrg abort (); 572 1.1 mrg var_count++; 573 1.1 mrg } 574 1.1 mrg break; 575 1.1 mrg } 576 1.1 mrg case IN_FUNCS: 577 1.1 mrg { 578 1.1 mrg char *funcname; 579 1.1 mrg if (sscanf (buf, "\t.8byte\t%ms\n", &funcname)) 580 1.1 mrg { 581 1.1 mrg obstack_ptr_grow (&fns_os, funcname); 582 1.1 mrg fn_count++; 583 1.1 mrg continue; 584 1.1 mrg } 585 1.1 mrg break; 586 1.1 mrg } 587 1.1 mrg } 588 1.1 mrg 589 1.1 mrg char dummy; 590 1.1 mrg if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0) 591 1.1 mrg { 592 1.1 mrg state = IN_VARS; 593 1.1 mrg 594 1.1 mrg /* Add a global symbol to allow plugin-gcn.c to locate the table 595 1.1 mrg at runtime. It can't use the "offload_var_table.N" emitted by 596 1.1 mrg the compiler because a) they're not global, and b) there's one 597 1.1 mrg for each input file combined into the binary. */ 598 1.1 mrg fputs (buf, out); 599 1.1 mrg fputs ("\t.global .offload_var_table\n" 600 1.1 mrg "\t.type .offload_var_table, @object\n" 601 1.1 mrg ".offload_var_table:\n", 602 1.1 mrg out); 603 1.1 mrg } 604 1.1 mrg else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0) 605 1.1 mrg state = IN_FUNCS; 606 1.1 mrg else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0) 607 1.1 mrg { 608 1.1 mrg state = IN_METADATA; 609 1.1 mrg regcount.kernel_name = NULL; 610 1.1 mrg regcount.sgpr_count = regcount.vgpr_count = -1; 611 1.1 mrg } 612 1.1 mrg else if (sscanf (buf, " .section %c", &dummy) > 0 613 1.1 mrg || sscanf (buf, " .text%c", &dummy) > 0 614 1.1 mrg || sscanf (buf, " .bss%c", &dummy) > 0 615 1.1 mrg || sscanf (buf, " .data%c", &dummy) > 0 616 1.1 mrg || sscanf (buf, " .ident %c", &dummy) > 0) 617 1.1 mrg state = IN_CODE; 618 1.1 mrg else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0) 619 1.1 mrg { 620 1.1 mrg state = IN_CODE; 621 1.1 mrg gcc_assert (regcount.kernel_name != NULL 622 1.1 mrg && regcount.sgpr_count >= 0 623 1.1 mrg && regcount.vgpr_count >= 0); 624 1.1 mrg obstack_grow (®counts_os, ®count, sizeof (regcount)); 625 1.1 mrg regcount_count++; 626 1.1 mrg regcount.kernel_name = NULL; 627 1.1 mrg regcount.sgpr_count = regcount.vgpr_count = -1; 628 1.1 mrg } 629 1.1 mrg 630 1.1 mrg if (state == IN_CODE || state == IN_METADATA || state == IN_VARS) 631 1.1 mrg fputs (buf, out); 632 1.1 mrg } 633 1.1 mrg 634 1.1 mrg char **fns = XOBFINISH (&fns_os, char **); 635 1.1 mrg struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *); 636 1.1 mrg struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *); 637 1.1 mrg 638 1.1 mrg fprintf (cfile, "#include <stdlib.h>\n"); 639 1.1 mrg fprintf (cfile, "#include <stdbool.h>\n\n"); 640 1.1 mrg 641 1.1 mrg fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count); 642 1.1 mrg 643 1.1 mrg /* Dump out function idents. */ 644 1.1 mrg fprintf (cfile, "static const struct hsa_kernel_description {\n" 645 1.1 mrg " const char *name;\n" 646 1.1 mrg " int oacc_dims[3];\n" 647 1.1 mrg " int sgpr_count;\n" 648 1.1 mrg " int vgpr_count;\n" 649 1.1 mrg "} gcn_kernels[] = {\n "); 650 1.1 mrg dim.d[0] = dim.d[1] = dim.d[2] = 0; 651 1.1 mrg const char *comma; 652 1.1 mrg int i; 653 1.1 mrg for (comma = "", i = 0; i < fn_count; comma = ",\n ", i++) 654 1.1 mrg { 655 1.1 mrg /* Find if we recorded dimensions for this function. */ 656 1.1 mrg int *d = dim.d; /* Previously zeroed. */ 657 1.1 mrg int sgpr_count = 0; 658 1.1 mrg int vgpr_count = 0; 659 1.1 mrg for (int j = 0; j < dims_count; j++) 660 1.1 mrg if (strcmp (fns[i], dims[j].name) == 0) 661 1.1 mrg { 662 1.1 mrg d = dims[j].d; 663 1.1 mrg break; 664 1.1 mrg } 665 1.1 mrg for (int j = 0; j < regcount_count; j++) 666 1.1 mrg if (strcmp (fns[i], regcounts[j].kernel_name) == 0) 667 1.1 mrg { 668 1.1 mrg sgpr_count = regcounts[j].sgpr_count; 669 1.1 mrg vgpr_count = regcounts[j].vgpr_count; 670 1.1 mrg break; 671 1.1 mrg } 672 1.1 mrg 673 1.1 mrg fprintf (cfile, "%s{\"%s\", {%d, %d, %d}, %d, %d}", comma, 674 1.1 mrg fns[i], d[0], d[1], d[2], sgpr_count, vgpr_count); 675 1.1 mrg 676 1.1 mrg free (fns[i]); 677 1.1 mrg } 678 1.1 mrg fprintf (cfile, "\n};\n\n"); 679 1.1 mrg 680 1.1 mrg obstack_free (&fns_os, NULL); 681 1.1 mrg for (i = 0; i < dims_count; i++) 682 1.1 mrg free (dims[i].name); 683 1.1 mrg for (i = 0; i < regcount_count; i++) 684 1.1 mrg free (regcounts[i].kernel_name); 685 1.1 mrg obstack_free (&dims_os, NULL); 686 1.1 mrg obstack_free (®counts_os, NULL); 687 1.1 mrg } 688 1.1 mrg 689 1.1 mrg /* Embed an object file into a C source file. */ 690 1.1 mrg 691 1.1 mrg static void 692 1.1 mrg process_obj (FILE *in, FILE *cfile) 693 1.1 mrg { 694 1.1 mrg size_t len = 0; 695 1.1 mrg const char *input = read_file (in, &len); 696 1.1 mrg 697 1.1 mrg /* Dump out an array containing the binary. 698 1.1 mrg FIXME: do this with objcopy. */ 699 1.1 mrg fprintf (cfile, "static unsigned char gcn_code[] = {"); 700 1.1 mrg for (size_t i = 0; i < len; i += 17) 701 1.1 mrg { 702 1.1 mrg fprintf (cfile, "\n\t"); 703 1.1 mrg for (size_t j = i; j < i + 17 && j < len; j++) 704 1.1 mrg fprintf (cfile, "%3u,", (unsigned char) input[j]); 705 1.1 mrg } 706 1.1 mrg fprintf (cfile, "\n};\n\n"); 707 1.1 mrg 708 1.1 mrg fprintf (cfile, 709 1.1 mrg "static const struct gcn_image {\n" 710 1.1 mrg " size_t size;\n" 711 1.1 mrg " void *image;\n" 712 1.1 mrg "} gcn_image = {\n" 713 1.1 mrg " %zu,\n" 714 1.1 mrg " gcn_code\n" 715 1.1 mrg "};\n\n", 716 1.1 mrg len); 717 1.1 mrg 718 1.1 mrg fprintf (cfile, 719 1.1 mrg "static const struct gcn_image_desc {\n" 720 1.1 mrg " const struct gcn_image *gcn_image;\n" 721 1.1 mrg " unsigned kernel_count;\n" 722 1.1 mrg " const struct hsa_kernel_description *kernel_infos;\n" 723 1.1 mrg " unsigned global_variable_count;\n" 724 1.1 mrg "} target_data = {\n" 725 1.1 mrg " &gcn_image,\n" 726 1.1 mrg " sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n" 727 1.1 mrg " gcn_kernels,\n" 728 1.1 mrg " gcn_num_vars\n" 729 1.1 mrg "};\n\n"); 730 1.1 mrg 731 1.1 mrg fprintf (cfile, 732 1.1 mrg "#ifdef __cplusplus\n" 733 1.1 mrg "extern \"C\" {\n" 734 1.1 mrg "#endif\n" 735 1.1 mrg "extern void GOMP_offload_register_ver" 736 1.1 mrg " (unsigned, const void *, int, const void *);\n" 737 1.1 mrg "extern void GOMP_offload_unregister_ver" 738 1.1 mrg " (unsigned, const void *, int, const void *);\n" 739 1.1 mrg "#ifdef __cplusplus\n" 740 1.1 mrg "}\n" 741 1.1 mrg "#endif\n\n"); 742 1.1 mrg 743 1.1 mrg fprintf (cfile, "extern const void *const __OFFLOAD_TABLE__[];\n\n"); 744 1.1 mrg 745 1.1 mrg fprintf (cfile, "static __attribute__((constructor)) void init (void)\n" 746 1.1 mrg "{\n" 747 1.1 mrg " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__," 748 1.1 mrg " %d/*GCN*/, &target_data);\n" 749 1.1 mrg "};\n", 750 1.1 mrg GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN), 751 1.1 mrg GOMP_DEVICE_GCN); 752 1.1 mrg 753 1.1 mrg fprintf (cfile, "static __attribute__((destructor)) void fini (void)\n" 754 1.1 mrg "{\n" 755 1.1 mrg " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__," 756 1.1 mrg " %d/*GCN*/, &target_data);\n" 757 1.1 mrg "};\n", 758 1.1 mrg GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN), 759 1.1 mrg GOMP_DEVICE_GCN); 760 1.1 mrg } 761 1.1 mrg 762 1.1 mrg /* Compile a C file using the host compiler. */ 763 1.1 mrg 764 1.1 mrg static void 765 1.1 mrg compile_native (const char *infile, const char *outfile, const char *compiler, 766 1.1 mrg bool fPIC, bool fpic) 767 1.1 mrg { 768 1.1 mrg const char *collect_gcc_options = getenv ("COLLECT_GCC_OPTIONS"); 769 1.1 mrg if (!collect_gcc_options) 770 1.1 mrg fatal_error (input_location, 771 1.1 mrg "environment variable COLLECT_GCC_OPTIONS must be set"); 772 1.1 mrg 773 1.1 mrg struct obstack argv_obstack; 774 1.1 mrg obstack_init (&argv_obstack); 775 1.1 mrg obstack_ptr_grow (&argv_obstack, compiler); 776 1.1 mrg if (fPIC) 777 1.1 mrg obstack_ptr_grow (&argv_obstack, "-fPIC"); 778 1.1 mrg if (fpic) 779 1.1 mrg obstack_ptr_grow (&argv_obstack, "-fpic"); 780 1.1 mrg if (save_temps) 781 1.1 mrg obstack_ptr_grow (&argv_obstack, "-save-temps"); 782 1.1 mrg if (verbose) 783 1.1 mrg obstack_ptr_grow (&argv_obstack, "-v"); 784 1.1 mrg obstack_ptr_grow (&argv_obstack, "-dumpdir"); 785 1.1 mrg obstack_ptr_grow (&argv_obstack, ""); 786 1.1 mrg obstack_ptr_grow (&argv_obstack, "-dumpbase"); 787 1.1 mrg obstack_ptr_grow (&argv_obstack, gcn_dumpbase); 788 1.1 mrg obstack_ptr_grow (&argv_obstack, "-dumpbase-ext"); 789 1.1 mrg obstack_ptr_grow (&argv_obstack, ".c"); 790 1.1 mrg switch (offload_abi) 791 1.1 mrg { 792 1.1 mrg case OFFLOAD_ABI_LP64: 793 1.1 mrg obstack_ptr_grow (&argv_obstack, "-m64"); 794 1.1 mrg break; 795 1.1 mrg case OFFLOAD_ABI_ILP32: 796 1.1 mrg obstack_ptr_grow (&argv_obstack, "-m32"); 797 1.1 mrg break; 798 1.1 mrg default: 799 1.1 mrg gcc_unreachable (); 800 1.1 mrg } 801 1.1 mrg obstack_ptr_grow (&argv_obstack, infile); 802 1.1 mrg obstack_ptr_grow (&argv_obstack, "-c"); 803 1.1 mrg obstack_ptr_grow (&argv_obstack, "-o"); 804 1.1 mrg obstack_ptr_grow (&argv_obstack, outfile); 805 1.1 mrg obstack_ptr_grow (&argv_obstack, NULL); 806 1.1 mrg 807 1.1 mrg const char **new_argv = XOBFINISH (&argv_obstack, const char **); 808 1.1 mrg fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true, 809 1.1 mrg ".gccnative_args"); 810 1.1 mrg obstack_free (&argv_obstack, NULL); 811 1.1 mrg } 812 1.1 mrg 813 1.1 mrg int 814 1.1 mrg main (int argc, char **argv) 815 1.1 mrg { 816 1.1 mrg FILE *in = stdin; 817 1.1 mrg FILE *out = stdout; 818 1.1 mrg FILE *cfile = stdout; 819 1.1 mrg const char *outname = 0; 820 1.1 mrg 821 1.1 mrg progname = "mkoffload"; 822 1.1 mrg diagnostic_initialize (global_dc, 0); 823 1.1 mrg 824 1.1 mrg obstack_init (&files_to_cleanup); 825 1.1 mrg if (atexit (mkoffload_cleanup) != 0) 826 1.1 mrg fatal_error (input_location, "atexit failed"); 827 1.1 mrg 828 1.1 mrg char *collect_gcc = getenv ("COLLECT_GCC"); 829 1.1 mrg if (collect_gcc == NULL) 830 1.1 mrg fatal_error (input_location, "COLLECT_GCC must be set."); 831 1.1 mrg const char *gcc_path = dirname (ASTRDUP (collect_gcc)); 832 1.1 mrg const char *gcc_exec = basename (ASTRDUP (collect_gcc)); 833 1.1 mrg 834 1.1 mrg size_t len = (strlen (gcc_path) + 1 + strlen (GCC_INSTALL_NAME) + 1); 835 1.1 mrg char *driver = XALLOCAVEC (char, len); 836 1.1 mrg 837 1.1 mrg if (strcmp (gcc_exec, collect_gcc) == 0) 838 1.1 mrg /* collect_gcc has no path, so it was found in PATH. Make sure we also 839 1.1 mrg find accel-gcc in PATH. */ 840 1.1 mrg gcc_path = NULL; 841 1.1 mrg 842 1.1 mrg int driver_used = 0; 843 1.1 mrg if (gcc_path != NULL) 844 1.1 mrg driver_used = sprintf (driver, "%s/", gcc_path); 845 1.1 mrg sprintf (driver + driver_used, "%s", GCC_INSTALL_NAME); 846 1.1 mrg 847 1.1 mrg bool found = false; 848 1.1 mrg if (gcc_path == NULL) 849 1.1 mrg found = true; 850 1.1 mrg else if (access_check (driver, X_OK) == 0) 851 1.1 mrg found = true; 852 1.1 mrg else 853 1.1 mrg { 854 1.1 mrg /* Don't use alloca pointer with XRESIZEVEC. */ 855 1.1 mrg driver = NULL; 856 1.1 mrg /* Look in all COMPILER_PATHs for GCC_INSTALL_NAME. */ 857 1.1 mrg char **paths = NULL; 858 1.1 mrg unsigned n_paths; 859 1.1 mrg n_paths = parse_env_var (getenv ("COMPILER_PATH"), &paths); 860 1.1 mrg for (unsigned i = 0; i < n_paths; i++) 861 1.1 mrg { 862 1.1 mrg len = strlen (paths[i]) + 1 + strlen (GCC_INSTALL_NAME) + 1; 863 1.1 mrg driver = XRESIZEVEC (char, driver, len); 864 1.1 mrg sprintf (driver, "%s/%s", paths[i], GCC_INSTALL_NAME); 865 1.1 mrg if (access_check (driver, X_OK) == 0) 866 1.1 mrg { 867 1.1 mrg found = true; 868 1.1 mrg break; 869 1.1 mrg } 870 1.1 mrg } 871 1.1 mrg free_array_of_ptrs ((void **) paths, n_paths); 872 1.1 mrg } 873 1.1 mrg 874 1.1 mrg if (!found) 875 1.1 mrg fatal_error (input_location, 876 1.1 mrg "offload compiler %s not found", GCC_INSTALL_NAME); 877 1.1 mrg 878 1.1 mrg /* We may be called with all the arguments stored in some file and 879 1.1 mrg passed with @file. Expand them into argv before processing. */ 880 1.1 mrg expandargv (&argc, &argv); 881 1.1 mrg 882 1.1 mrg /* Scan the argument vector. */ 883 1.1 mrg bool fopenmp = false; 884 1.1 mrg bool fopenacc = false; 885 1.1 mrg bool fPIC = false; 886 1.1 mrg bool fpic = false; 887 1.1 mrg bool sram_seen = false; 888 1.1 mrg for (int i = 1; i < argc; i++) 889 1.1 mrg { 890 1.1 mrg #define STR "-foffload-abi=" 891 1.1 mrg if (startswith (argv[i], STR)) 892 1.1 mrg { 893 1.1 mrg if (strcmp (argv[i] + strlen (STR), "lp64") == 0) 894 1.1 mrg offload_abi = OFFLOAD_ABI_LP64; 895 1.1 mrg else if (strcmp (argv[i] + strlen (STR), "ilp32") == 0) 896 1.1 mrg offload_abi = OFFLOAD_ABI_ILP32; 897 1.1 mrg else 898 1.1 mrg fatal_error (input_location, 899 1.1 mrg "unrecognizable argument of option " STR); 900 1.1 mrg } 901 1.1 mrg #undef STR 902 1.1 mrg else if (strcmp (argv[i], "-fopenmp") == 0) 903 1.1 mrg fopenmp = true; 904 1.1 mrg else if (strcmp (argv[i], "-fopenacc") == 0) 905 1.1 mrg fopenacc = true; 906 1.1 mrg else if (strcmp (argv[i], "-fPIC") == 0) 907 1.1 mrg fPIC = true; 908 1.1 mrg else if (strcmp (argv[i], "-fpic") == 0) 909 1.1 mrg fpic = true; 910 1.1 mrg else if (strcmp (argv[i], "-mxnack") == 0) 911 1.1 mrg SET_XNACK_ON (elf_flags); 912 1.1 mrg else if (strcmp (argv[i], "-mno-xnack") == 0) 913 1.1 mrg SET_XNACK_OFF (elf_flags); 914 1.1 mrg else if (strcmp (argv[i], "-msram-ecc=on") == 0) 915 1.1 mrg { 916 1.1 mrg SET_SRAM_ECC_ON (elf_flags); 917 1.1 mrg sram_seen = true; 918 1.1 mrg } 919 1.1 mrg else if (strcmp (argv[i], "-msram-ecc=any") == 0) 920 1.1 mrg { 921 1.1 mrg SET_SRAM_ECC_ANY (elf_flags); 922 1.1 mrg sram_seen = true; 923 1.1 mrg } 924 1.1 mrg else if (strcmp (argv[i], "-msram-ecc=off") == 0) 925 1.1 mrg { 926 1.1 mrg SET_SRAM_ECC_OFF (elf_flags); 927 1.1 mrg sram_seen = true; 928 1.1 mrg } 929 1.1 mrg else if (strcmp (argv[i], "-save-temps") == 0) 930 1.1 mrg save_temps = true; 931 1.1 mrg else if (strcmp (argv[i], "-v") == 0) 932 1.1 mrg verbose = true; 933 1.1 mrg else if (strcmp (argv[i], "-dumpbase") == 0 934 1.1 mrg && i + 1 < argc) 935 1.1 mrg dumppfx = argv[++i]; 936 1.1 mrg else if (strcmp (argv[i], "-march=fiji") == 0) 937 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; 938 1.1 mrg else if (strcmp (argv[i], "-march=gfx900") == 0) 939 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900; 940 1.1 mrg else if (strcmp (argv[i], "-march=gfx906") == 0) 941 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906; 942 1.1 mrg else if (strcmp (argv[i], "-march=gfx908") == 0) 943 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908; 944 1.1 mrg } 945 1.1 mrg 946 1.1 mrg if (!(fopenacc ^ fopenmp)) 947 1.1 mrg fatal_error (input_location, "either -fopenacc or -fopenmp must be set"); 948 1.1 mrg 949 1.1 mrg if (!sram_seen) 950 1.1 mrg { 951 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX 952 1.1 mrg /* For HSACOv3, the SRAM-ECC feature defaults to "on" on GPUs where the 953 1.1 mrg feature is available. 954 1.1 mrg (HSACOv4 has elf_flags initialsed to "any" in all cases.) */ 955 1.1 mrg switch (elf_arch) 956 1.1 mrg { 957 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX803: 958 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX900: 959 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX906: 960 1.1 mrg #ifndef HAVE_GCN_SRAM_ECC_GFX908 961 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX908: 962 1.1 mrg #endif 963 1.1 mrg break; 964 1.1 mrg default: 965 1.1 mrg SET_SRAM_ECC_ON (elf_flags); 966 1.1 mrg break; 967 1.1 mrg } 968 1.1 mrg #endif 969 1.1 mrg } 970 1.1 mrg 971 1.1 mrg const char *abi; 972 1.1 mrg switch (offload_abi) 973 1.1 mrg { 974 1.1 mrg case OFFLOAD_ABI_LP64: 975 1.1 mrg abi = "-m64"; 976 1.1 mrg break; 977 1.1 mrg case OFFLOAD_ABI_ILP32: 978 1.1 mrg abi = "-m32"; 979 1.1 mrg break; 980 1.1 mrg default: 981 1.1 mrg gcc_unreachable (); 982 1.1 mrg } 983 1.1 mrg 984 1.1 mrg /* Build arguments for compiler pass. */ 985 1.1 mrg struct obstack cc_argv_obstack; 986 1.1 mrg obstack_init (&cc_argv_obstack); 987 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, driver); 988 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-S"); 989 1.1 mrg 990 1.1 mrg if (save_temps) 991 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-save-temps"); 992 1.1 mrg if (verbose) 993 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-v"); 994 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, abi); 995 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-xlto"); 996 1.1 mrg if (fopenmp) 997 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-mgomp"); 998 1.1 mrg 999 1.1 mrg for (int ix = 1; ix != argc; ix++) 1000 1.1 mrg { 1001 1.1 mrg if (!strcmp (argv[ix], "-o") && ix + 1 != argc) 1002 1.1 mrg outname = argv[++ix]; 1003 1.1 mrg else 1004 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, argv[ix]); 1005 1.1 mrg } 1006 1.1 mrg 1007 1.1 mrg if (!dumppfx) 1008 1.1 mrg dumppfx = outname; 1009 1.1 mrg 1010 1.1 mrg gcn_dumpbase = concat (dumppfx, ".c", NULL); 1011 1.1 mrg 1012 1.1 mrg const char *gcn_cfile_name; 1013 1.1 mrg if (save_temps) 1014 1.1 mrg gcn_cfile_name = gcn_dumpbase; 1015 1.1 mrg else 1016 1.1 mrg gcn_cfile_name = make_temp_file (".c"); 1017 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name); 1018 1.1 mrg 1019 1.1 mrg cfile = fopen (gcn_cfile_name, "w"); 1020 1.1 mrg if (!cfile) 1021 1.1 mrg fatal_error (input_location, "cannot open '%s'", gcn_cfile_name); 1022 1.1 mrg 1023 1.1 mrg /* Currently, we only support offloading in 64-bit configurations. */ 1024 1.1 mrg if (offload_abi == OFFLOAD_ABI_LP64) 1025 1.1 mrg { 1026 1.1 mrg const char *mko_dumpbase = concat (dumppfx, ".mkoffload", NULL); 1027 1.1 mrg const char *hsaco_dumpbase = concat (dumppfx, ".mkoffload.hsaco", NULL); 1028 1.1 mrg 1029 1.1 mrg const char *gcn_s1_name; 1030 1.1 mrg const char *gcn_s2_name; 1031 1.1 mrg const char *gcn_o_name; 1032 1.1 mrg if (save_temps) 1033 1.1 mrg { 1034 1.1 mrg gcn_s1_name = concat (mko_dumpbase, ".1.s", NULL); 1035 1.1 mrg gcn_s2_name = concat (mko_dumpbase, ".2.s", NULL); 1036 1.1 mrg gcn_o_name = hsaco_dumpbase; 1037 1.1 mrg } 1038 1.1 mrg else 1039 1.1 mrg { 1040 1.1 mrg gcn_s1_name = make_temp_file (".mkoffload.1.s"); 1041 1.1 mrg gcn_s2_name = make_temp_file (".mkoffload.2.s"); 1042 1.1 mrg gcn_o_name = make_temp_file (".mkoffload.hsaco"); 1043 1.1 mrg } 1044 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_s1_name); 1045 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_s2_name); 1046 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_o_name); 1047 1.1 mrg 1048 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpdir"); 1049 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, ""); 1050 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase"); 1051 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, mko_dumpbase); 1052 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase-ext"); 1053 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, ""); 1054 1.1 mrg 1055 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-o"); 1056 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, gcn_s1_name); 1057 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, NULL); 1058 1.1 mrg const char **cc_argv = XOBFINISH (&cc_argv_obstack, const char **); 1059 1.1 mrg 1060 1.1 mrg /* Build arguments for assemble/link pass. */ 1061 1.1 mrg struct obstack ld_argv_obstack; 1062 1.1 mrg obstack_init (&ld_argv_obstack); 1063 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, driver); 1064 1.1 mrg 1065 1.1 mrg /* Extract early-debug information from the input objects. 1066 1.1 mrg This loop finds all the inputs that end ".o" and aren't the output. */ 1067 1.1 mrg int dbgcount = 0; 1068 1.1 mrg for (int ix = 1; ix != argc; ix++) 1069 1.1 mrg { 1070 1.1 mrg if (!strcmp (argv[ix], "-o") && ix + 1 != argc) 1071 1.1 mrg ++ix; 1072 1.1 mrg else 1073 1.1 mrg { 1074 1.1 mrg if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0) 1075 1.1 mrg { 1076 1.1 mrg char *dbgobj; 1077 1.1 mrg if (save_temps) 1078 1.1 mrg { 1079 1.1 mrg char buf[10]; 1080 1.1 mrg sprintf (buf, "%d", dbgcount++); 1081 1.1 mrg dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL); 1082 1.1 mrg } 1083 1.1 mrg else 1084 1.1 mrg dbgobj = make_temp_file (".mkoffload.dbg.o"); 1085 1.1 mrg obstack_ptr_grow (&files_to_cleanup, dbgobj); 1086 1.1 mrg 1087 1.1 mrg /* If the copy fails then just ignore it. */ 1088 1.1 mrg if (copy_early_debug_info (argv[ix], dbgobj)) 1089 1.1 mrg { 1090 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, dbgobj); 1091 1.1 mrg obstack_ptr_grow (&files_to_cleanup, dbgobj); 1092 1.1 mrg } 1093 1.1 mrg else 1094 1.1 mrg free (dbgobj); 1095 1.1 mrg } 1096 1.1 mrg } 1097 1.1 mrg } 1098 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name); 1099 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, "-lgomp"); 1100 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, 1101 1.1 mrg (TEST_XNACK (elf_flags) 1102 1.1 mrg ? "-mxnack" : "-mno-xnack")); 1103 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, 1104 1.1 mrg (TEST_SRAM_ECC_ON (elf_flags) ? "-msram-ecc=on" 1105 1.1 mrg : TEST_SRAM_ECC_ANY (elf_flags) ? "-msram-ecc=any" 1106 1.1 mrg : "-msram-ecc=off")); 1107 1.1 mrg if (verbose) 1108 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, "-v"); 1109 1.1 mrg 1110 1.1 mrg for (int i = 1; i < argc; i++) 1111 1.1 mrg if (startswith (argv[i], "-l") 1112 1.1 mrg || startswith (argv[i], "-Wl") 1113 1.1 mrg || startswith (argv[i], "-march")) 1114 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, argv[i]); 1115 1.1 mrg 1116 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpdir"); 1117 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, ""); 1118 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase"); 1119 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, hsaco_dumpbase); 1120 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase-ext"); 1121 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, ""); 1122 1.1 mrg 1123 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, "-o"); 1124 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, gcn_o_name); 1125 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, NULL); 1126 1.1 mrg const char **ld_argv = XOBFINISH (&ld_argv_obstack, const char **); 1127 1.1 mrg 1128 1.1 mrg /* Clean up unhelpful environment variables. */ 1129 1.1 mrg char *execpath = getenv ("GCC_EXEC_PREFIX"); 1130 1.1 mrg char *cpath = getenv ("COMPILER_PATH"); 1131 1.1 mrg char *lpath = getenv ("LIBRARY_PATH"); 1132 1.1 mrg unsetenv ("GCC_EXEC_PREFIX"); 1133 1.1 mrg unsetenv ("COMPILER_PATH"); 1134 1.1 mrg unsetenv ("LIBRARY_PATH"); 1135 1.1 mrg 1136 1.1 mrg /* Run the compiler pass. */ 1137 1.1 mrg fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true, ".gcc_args"); 1138 1.1 mrg obstack_free (&cc_argv_obstack, NULL); 1139 1.1 mrg 1140 1.1 mrg in = fopen (gcn_s1_name, "r"); 1141 1.1 mrg if (!in) 1142 1.1 mrg fatal_error (input_location, "cannot open intermediate gcn asm file"); 1143 1.1 mrg 1144 1.1 mrg out = fopen (gcn_s2_name, "w"); 1145 1.1 mrg if (!out) 1146 1.1 mrg fatal_error (input_location, "cannot open '%s'", gcn_s2_name); 1147 1.1 mrg 1148 1.1 mrg process_asm (in, out, cfile); 1149 1.1 mrg 1150 1.1 mrg fclose (in); 1151 1.1 mrg fclose (out); 1152 1.1 mrg 1153 1.1 mrg /* Run the assemble/link pass. */ 1154 1.1 mrg fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args"); 1155 1.1 mrg obstack_free (&ld_argv_obstack, NULL); 1156 1.1 mrg 1157 1.1 mrg in = fopen (gcn_o_name, "r"); 1158 1.1 mrg if (!in) 1159 1.1 mrg fatal_error (input_location, "cannot open intermediate gcn obj file"); 1160 1.1 mrg 1161 1.1 mrg process_obj (in, cfile); 1162 1.1 mrg 1163 1.1 mrg fclose (in); 1164 1.1 mrg 1165 1.1 mrg xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL)); 1166 1.1 mrg xputenv (concat ("COMPILER_PATH=", cpath, NULL)); 1167 1.1 mrg xputenv (concat ("LIBRARY_PATH=", lpath, NULL)); 1168 1.1 mrg } 1169 1.1 mrg 1170 1.1 mrg fclose (cfile); 1171 1.1 mrg 1172 1.1 mrg compile_native (gcn_cfile_name, outname, collect_gcc, fPIC, fpic); 1173 1.1 mrg 1174 1.1 mrg return 0; 1175 1.1 mrg } 1176