mkoffload.cc revision 1.1 1 1.1 mrg /* Offload image generation tool for AMD GCN.
2 1.1 mrg
3 1.1 mrg Copyright (C) 2014-2022 Free Software Foundation, Inc.
4 1.1 mrg
5 1.1 mrg This file is part of GCC.
6 1.1 mrg
7 1.1 mrg GCC is free software; you can redistribute it and/or modify it
8 1.1 mrg under the terms of the GNU General Public License as published
9 1.1 mrg by the Free Software Foundation; either version 3, or (at your
10 1.1 mrg option) any later version.
11 1.1 mrg
12 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
13 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 1.1 mrg License for more details.
16 1.1 mrg
17 1.1 mrg You should have received a copy of the GNU General Public License
18 1.1 mrg along with GCC; see the file COPYING3. If not see
19 1.1 mrg <http://www.gnu.org/licenses/>. */
20 1.1 mrg
21 1.1 mrg /* Munges GCN assembly into a C source file defining the GCN code as a
22 1.1 mrg string.
23 1.1 mrg
24 1.1 mrg This is not a complete assembler. We presume the source is well
25 1.1 mrg formed from the compiler and can die horribly if it is not. */
26 1.1 mrg
27 1.1 mrg #include "config.h"
28 1.1 mrg #include "system.h"
29 1.1 mrg #include "coretypes.h"
30 1.1 mrg #include "obstack.h"
31 1.1 mrg #include "diagnostic.h"
32 1.1 mrg #include "intl.h"
33 1.1 mrg #include <libgen.h>
34 1.1 mrg #include "collect-utils.h"
35 1.1 mrg #include "gomp-constants.h"
36 1.1 mrg #include "simple-object.h"
37 1.1 mrg #include "elf.h"
38 1.1 mrg
39 1.1 mrg /* These probably won't (all) be in elf.h for a while. */
40 1.1 mrg #undef EM_AMDGPU
41 1.1 mrg #define EM_AMDGPU 0xe0;
42 1.1 mrg
43 1.1 mrg #undef ELFOSABI_AMDGPU_HSA
44 1.1 mrg #define ELFOSABI_AMDGPU_HSA 64
45 1.1 mrg #undef ELFABIVERSION_AMDGPU_HSA_V3
46 1.1 mrg #define ELFABIVERSION_AMDGPU_HSA_V3 1
47 1.1 mrg #undef ELFABIVERSION_AMDGPU_HSA_V4
48 1.1 mrg #define ELFABIVERSION_AMDGPU_HSA_V4 2
49 1.1 mrg
50 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX803
51 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
52 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX900
53 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
54 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX906
55 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
56 1.1 mrg #undef EF_AMDGPU_MACH_AMDGCN_GFX908
57 1.1 mrg #define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
58 1.1 mrg
59 1.1 mrg #define EF_AMDGPU_XNACK_V3 0x100
60 1.1 mrg #define EF_AMDGPU_SRAM_ECC_V3 0x200
61 1.1 mrg
62 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
63 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
64 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_ANY_V4 0x100
65 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_OFF_V4 0x200
66 1.1 mrg #define EF_AMDGPU_FEATURE_XNACK_ON_V4 0x300
67 1.1 mrg
68 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_V4 0xc00 /* Mask. */
69 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4 0x000
70 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_ANY_V4 0x400
71 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 0x800
72 1.1 mrg #define EF_AMDGPU_FEATURE_SRAMECC_ON_V4 0xc00
73 1.1 mrg
74 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX
75 1.1 mrg #define SET_XNACK_ON(VAR) VAR |= EF_AMDGPU_XNACK_V3
76 1.1 mrg #define SET_XNACK_OFF(VAR) VAR &= ~EF_AMDGPU_XNACK_V3
77 1.1 mrg #define TEST_XNACK(VAR) (VAR & EF_AMDGPU_XNACK_V3)
78 1.1 mrg
79 1.1 mrg #define SET_SRAM_ECC_ON(VAR) VAR |= EF_AMDGPU_SRAM_ECC_V3
80 1.1 mrg #define SET_SRAM_ECC_ANY(VAR) SET_SRAM_ECC_ON (VAR)
81 1.1 mrg #define SET_SRAM_ECC_OFF(VAR) VAR &= ~EF_AMDGPU_SRAM_ECC_V3
82 1.1 mrg #define SET_SRAM_ECC_UNSUPPORTED(VAR) SET_SRAM_ECC_OFF (VAR)
83 1.1 mrg #define TEST_SRAM_ECC_ANY(VAR) 0 /* Not supported. */
84 1.1 mrg #define TEST_SRAM_ECC_ON(VAR) (VAR & EF_AMDGPU_SRAM_ECC_V3)
85 1.1 mrg #endif
86 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX
87 1.1 mrg #define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
88 1.1 mrg | EF_AMDGPU_FEATURE_XNACK_ON_V4)
89 1.1 mrg #define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
90 1.1 mrg | EF_AMDGPU_FEATURE_XNACK_OFF_V4)
91 1.1 mrg #define TEST_XNACK(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
92 1.1 mrg == EF_AMDGPU_FEATURE_XNACK_ON_V4)
93 1.1 mrg
94 1.1 mrg #define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
95 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
96 1.1 mrg #define SET_SRAM_ECC_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
97 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
98 1.1 mrg #define SET_SRAM_ECC_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
99 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_OFF_V4)
100 1.1 mrg #define SET_SRAM_ECC_UNSUPPORTED(VAR) \
101 1.1 mrg VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
102 1.1 mrg | EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4)
103 1.1 mrg #define TEST_SRAM_ECC_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
104 1.1 mrg == EF_AMDGPU_FEATURE_SRAMECC_ANY_V4)
105 1.1 mrg #define TEST_SRAM_ECC_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) \
106 1.1 mrg == EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
107 1.1 mrg #endif
108 1.1 mrg
109 1.1 mrg #ifndef R_AMDGPU_NONE
110 1.1 mrg #define R_AMDGPU_NONE 0
111 1.1 mrg #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
112 1.1 mrg #define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */
113 1.1 mrg #define R_AMDGPU_ABS64 3 /* S + A */
114 1.1 mrg #define R_AMDGPU_REL32 4 /* S + A - P */
115 1.1 mrg #define R_AMDGPU_REL64 5 /* S + A - P */
116 1.1 mrg #define R_AMDGPU_ABS32 6 /* S + A */
117 1.1 mrg #define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */
118 1.1 mrg #define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & 0xFFFFFFFF */
119 1.1 mrg #define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */
120 1.1 mrg #define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */
121 1.1 mrg #define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */
122 1.1 mrg #define R_AMDGPU_RELATIVE64 13 /* B + A */
123 1.1 mrg #endif
124 1.1 mrg
125 1.1 mrg const char tool_name[] = "gcn mkoffload";
126 1.1 mrg
127 1.1 mrg static const char *gcn_dumpbase;
128 1.1 mrg static struct obstack files_to_cleanup;
129 1.1 mrg
130 1.1 mrg enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
131 1.1 mrg uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture.
132 1.1 mrg uint32_t elf_flags =
133 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX
134 1.1 mrg 0;
135 1.1 mrg #endif
136 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX
137 1.1 mrg (EF_AMDGPU_FEATURE_XNACK_ANY_V4 | EF_AMDGPU_FEATURE_SRAMECC_ANY_V4);
138 1.1 mrg #endif
139 1.1 mrg
140 1.1 mrg /* Delete tempfiles. */
141 1.1 mrg
142 1.1 mrg void
143 1.1 mrg tool_cleanup (bool from_signal ATTRIBUTE_UNUSED)
144 1.1 mrg {
145 1.1 mrg obstack_ptr_grow (&files_to_cleanup, NULL);
146 1.1 mrg const char **files = XOBFINISH (&files_to_cleanup, const char **);
147 1.1 mrg for (int i = 0; files[i]; i++)
148 1.1 mrg maybe_unlink (files[i]);
149 1.1 mrg }
150 1.1 mrg
151 1.1 mrg static void
152 1.1 mrg mkoffload_cleanup (void)
153 1.1 mrg {
154 1.1 mrg tool_cleanup (false);
155 1.1 mrg }
156 1.1 mrg
157 1.1 mrg /* Unlink FILE unless requested otherwise. */
158 1.1 mrg
159 1.1 mrg void
160 1.1 mrg maybe_unlink (const char *file)
161 1.1 mrg {
162 1.1 mrg if (!save_temps)
163 1.1 mrg {
164 1.1 mrg if (unlink_if_ordinary (file) && errno != ENOENT)
165 1.1 mrg fatal_error (input_location, "deleting file %s: %m", file);
166 1.1 mrg }
167 1.1 mrg else if (verbose)
168 1.1 mrg fprintf (stderr, "[Leaving %s]\n", file);
169 1.1 mrg }
170 1.1 mrg
171 1.1 mrg /* Add or change the value of an environment variable, outputting the
172 1.1 mrg change to standard error if in verbose mode. */
173 1.1 mrg
174 1.1 mrg static void
175 1.1 mrg xputenv (const char *string)
176 1.1 mrg {
177 1.1 mrg if (verbose)
178 1.1 mrg fprintf (stderr, "%s\n", string);
179 1.1 mrg putenv (CONST_CAST (char *, string));
180 1.1 mrg }
181 1.1 mrg
182 1.1 mrg /* Read the whole input file. It will be NUL terminated (but
183 1.1 mrg remember, there could be a NUL in the file itself. */
184 1.1 mrg
185 1.1 mrg static const char *
186 1.1 mrg read_file (FILE *stream, size_t *plen)
187 1.1 mrg {
188 1.1 mrg size_t alloc = 16384;
189 1.1 mrg size_t base = 0;
190 1.1 mrg char *buffer;
191 1.1 mrg
192 1.1 mrg if (!fseek (stream, 0, SEEK_END))
193 1.1 mrg {
194 1.1 mrg /* Get the file size. */
195 1.1 mrg long s = ftell (stream);
196 1.1 mrg if (s >= 0)
197 1.1 mrg alloc = s + 100;
198 1.1 mrg fseek (stream, 0, SEEK_SET);
199 1.1 mrg }
200 1.1 mrg buffer = XNEWVEC (char, alloc);
201 1.1 mrg
202 1.1 mrg for (;;)
203 1.1 mrg {
204 1.1 mrg size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
205 1.1 mrg
206 1.1 mrg if (!n)
207 1.1 mrg break;
208 1.1 mrg base += n;
209 1.1 mrg if (base + 1 == alloc)
210 1.1 mrg {
211 1.1 mrg alloc *= 2;
212 1.1 mrg buffer = XRESIZEVEC (char, buffer, alloc);
213 1.1 mrg }
214 1.1 mrg }
215 1.1 mrg buffer[base] = 0;
216 1.1 mrg *plen = base;
217 1.1 mrg return buffer;
218 1.1 mrg }
219 1.1 mrg
220 1.1 mrg /* Parse STR, saving found tokens into PVALUES and return their number.
221 1.1 mrg Tokens are assumed to be delimited by ':'. */
222 1.1 mrg
223 1.1 mrg static unsigned
224 1.1 mrg parse_env_var (const char *str, char ***pvalues)
225 1.1 mrg {
226 1.1 mrg const char *curval, *nextval;
227 1.1 mrg char **values;
228 1.1 mrg unsigned num = 1, i;
229 1.1 mrg
230 1.1 mrg curval = strchr (str, ':');
231 1.1 mrg while (curval)
232 1.1 mrg {
233 1.1 mrg num++;
234 1.1 mrg curval = strchr (curval + 1, ':');
235 1.1 mrg }
236 1.1 mrg
237 1.1 mrg values = (char **) xmalloc (num * sizeof (char *));
238 1.1 mrg curval = str;
239 1.1 mrg nextval = strchr (curval, ':');
240 1.1 mrg if (nextval == NULL)
241 1.1 mrg nextval = strchr (curval, '\0');
242 1.1 mrg
243 1.1 mrg for (i = 0; i < num; i++)
244 1.1 mrg {
245 1.1 mrg int l = nextval - curval;
246 1.1 mrg values[i] = (char *) xmalloc (l + 1);
247 1.1 mrg memcpy (values[i], curval, l);
248 1.1 mrg values[i][l] = 0;
249 1.1 mrg curval = nextval + 1;
250 1.1 mrg nextval = strchr (curval, ':');
251 1.1 mrg if (nextval == NULL)
252 1.1 mrg nextval = strchr (curval, '\0');
253 1.1 mrg }
254 1.1 mrg *pvalues = values;
255 1.1 mrg return num;
256 1.1 mrg }
257 1.1 mrg
258 1.1 mrg /* Auxiliary function that frees elements of PTR and PTR itself.
259 1.1 mrg N is number of elements to be freed. If PTR is NULL, nothing is freed.
260 1.1 mrg If an element is NULL, subsequent elements are not freed. */
261 1.1 mrg
262 1.1 mrg static void
263 1.1 mrg free_array_of_ptrs (void **ptr, unsigned n)
264 1.1 mrg {
265 1.1 mrg unsigned i;
266 1.1 mrg if (!ptr)
267 1.1 mrg return;
268 1.1 mrg for (i = 0; i < n; i++)
269 1.1 mrg {
270 1.1 mrg if (!ptr[i])
271 1.1 mrg break;
272 1.1 mrg free (ptr[i]);
273 1.1 mrg }
274 1.1 mrg free (ptr);
275 1.1 mrg return;
276 1.1 mrg }
277 1.1 mrg
278 1.1 mrg /* Check whether NAME can be accessed in MODE. This is like access,
279 1.1 mrg except that it never considers directories to be executable. */
280 1.1 mrg
281 1.1 mrg static int
282 1.1 mrg access_check (const char *name, int mode)
283 1.1 mrg {
284 1.1 mrg if (mode == X_OK)
285 1.1 mrg {
286 1.1 mrg struct stat st;
287 1.1 mrg
288 1.1 mrg if (stat (name, &st) < 0 || S_ISDIR (st.st_mode))
289 1.1 mrg return -1;
290 1.1 mrg }
291 1.1 mrg
292 1.1 mrg return access (name, mode);
293 1.1 mrg }
294 1.1 mrg
295 1.1 mrg /* Copy the early-debug-info from the incoming LTO object to a new object
296 1.1 mrg that will be linked into the output HSACO file. The host relocations
297 1.1 mrg must be translated into GCN relocations, and any global undefined symbols
298 1.1 mrg must be weakened (so as not to have the debug info try to pull in host
299 1.1 mrg junk).
300 1.1 mrg
301 1.1 mrg Returns true if the file was created, false otherwise. */
302 1.1 mrg
303 1.1 mrg static bool
304 1.1 mrg copy_early_debug_info (const char *infile, const char *outfile)
305 1.1 mrg {
306 1.1 mrg const char *errmsg;
307 1.1 mrg int err;
308 1.1 mrg
309 1.1 mrg /* The simple_object code can handle extracting the debug sections.
310 1.1 mrg This code is based on that in lto-wrapper.cc. */
311 1.1 mrg int infd = open (infile, O_RDONLY | O_BINARY);
312 1.1 mrg if (infd == -1)
313 1.1 mrg return false;
314 1.1 mrg simple_object_read *inobj = simple_object_start_read (infd, 0,
315 1.1 mrg "__GNU_LTO",
316 1.1 mrg &errmsg, &err);
317 1.1 mrg if (!inobj)
318 1.1 mrg return false;
319 1.1 mrg
320 1.1 mrg off_t off, len;
321 1.1 mrg if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info",
322 1.1 mrg &off, &len, &errmsg, &err) != 1)
323 1.1 mrg {
324 1.1 mrg simple_object_release_read (inobj);
325 1.1 mrg close (infd);
326 1.1 mrg return false;
327 1.1 mrg }
328 1.1 mrg
329 1.1 mrg errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, true);
330 1.1 mrg if (errmsg)
331 1.1 mrg {
332 1.1 mrg unlink_if_ordinary (outfile);
333 1.1 mrg return false;
334 1.1 mrg }
335 1.1 mrg
336 1.1 mrg simple_object_release_read (inobj);
337 1.1 mrg close (infd);
338 1.1 mrg
339 1.1 mrg /* Open the file we just created for some adjustments.
340 1.1 mrg The simple_object code can't do this, so we do it manually. */
341 1.1 mrg FILE *outfd = fopen (outfile, "r+b");
342 1.1 mrg if (!outfd)
343 1.1 mrg return false;
344 1.1 mrg
345 1.1 mrg Elf64_Ehdr ehdr;
346 1.1 mrg if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1)
347 1.1 mrg {
348 1.1 mrg fclose (outfd);
349 1.1 mrg return true;
350 1.1 mrg }
351 1.1 mrg
352 1.1 mrg /* We only support host relocations of x86_64, for now. */
353 1.1 mrg gcc_assert (ehdr.e_machine == EM_X86_64);
354 1.1 mrg
355 1.1 mrg /* Fiji devices use HSACOv3 regardless of the assembler. */
356 1.1 mrg uint32_t elf_flags_actual = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
357 1.1 mrg ? 0 : elf_flags);
358 1.1 mrg /* GFX900 devices don't support the sramecc attribute even if
359 1.1 mrg a buggy assembler thinks it does. This must match gcn-hsa.h */
360 1.1 mrg if (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX900)
361 1.1 mrg SET_SRAM_ECC_UNSUPPORTED (elf_flags_actual);
362 1.1 mrg
363 1.1 mrg /* Patch the correct elf architecture flag into the file. */
364 1.1 mrg ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
365 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX
366 1.1 mrg ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA_V3;
367 1.1 mrg #endif
368 1.1 mrg #ifdef HAVE_GCN_ASM_V4_SYNTAX
369 1.1 mrg ehdr.e_ident[8] = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
370 1.1 mrg ? ELFABIVERSION_AMDGPU_HSA_V3
371 1.1 mrg : ELFABIVERSION_AMDGPU_HSA_V4);
372 1.1 mrg #endif
373 1.1 mrg ehdr.e_type = ET_REL;
374 1.1 mrg ehdr.e_machine = EM_AMDGPU;
375 1.1 mrg ehdr.e_flags = elf_arch | elf_flags_actual;
376 1.1 mrg
377 1.1 mrg /* Load the section headers so we can walk them later. */
378 1.1 mrg Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
379 1.1 mrg * ehdr.e_shnum);
380 1.1 mrg if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1
381 1.1 mrg || fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum,
382 1.1 mrg outfd) != ehdr.e_shnum)
383 1.1 mrg {
384 1.1 mrg free (sections);
385 1.1 mrg fclose (outfd);
386 1.1 mrg return true;
387 1.1 mrg }
388 1.1 mrg
389 1.1 mrg /* Convert the host relocations to target relocations. */
390 1.1 mrg for (int i = 0; i < ehdr.e_shnum; i++)
391 1.1 mrg {
392 1.1 mrg if (sections[i].sh_type != SHT_RELA)
393 1.1 mrg continue;
394 1.1 mrg
395 1.1 mrg char *data = (char *)xmalloc (sections[i].sh_size);
396 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
397 1.1 mrg || fread (data, sections[i].sh_size, 1, outfd) != 1)
398 1.1 mrg {
399 1.1 mrg free (data);
400 1.1 mrg continue;
401 1.1 mrg }
402 1.1 mrg
403 1.1 mrg for (size_t offset = 0;
404 1.1 mrg offset < sections[i].sh_size;
405 1.1 mrg offset += sections[i].sh_entsize)
406 1.1 mrg {
407 1.1 mrg Elf64_Rela *reloc = (Elf64_Rela *) (data + offset);
408 1.1 mrg
409 1.1 mrg /* Map the host relocations to GCN relocations.
410 1.1 mrg Only relocations that can appear in DWARF need be handled. */
411 1.1 mrg switch (ELF64_R_TYPE (reloc->r_info))
412 1.1 mrg {
413 1.1 mrg case R_X86_64_32:
414 1.1 mrg case R_X86_64_32S:
415 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
416 1.1 mrg R_AMDGPU_ABS32);
417 1.1 mrg break;
418 1.1 mrg case R_X86_64_PC32:
419 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
420 1.1 mrg R_AMDGPU_REL32);
421 1.1 mrg break;
422 1.1 mrg case R_X86_64_PC64:
423 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
424 1.1 mrg R_AMDGPU_REL64);
425 1.1 mrg break;
426 1.1 mrg case R_X86_64_64:
427 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
428 1.1 mrg R_AMDGPU_ABS64);
429 1.1 mrg break;
430 1.1 mrg case R_X86_64_RELATIVE:
431 1.1 mrg reloc->r_info = ELF32_R_INFO(ELF32_R_SYM(reloc->r_info),
432 1.1 mrg R_AMDGPU_RELATIVE64);
433 1.1 mrg break;
434 1.1 mrg default:
435 1.1 mrg gcc_unreachable ();
436 1.1 mrg }
437 1.1 mrg }
438 1.1 mrg
439 1.1 mrg /* Write back our relocation changes. */
440 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
441 1.1 mrg fwrite (data, sections[i].sh_size, 1, outfd);
442 1.1 mrg
443 1.1 mrg free (data);
444 1.1 mrg }
445 1.1 mrg
446 1.1 mrg /* Weaken any global undefined symbols that would pull in unwanted
447 1.1 mrg objects. */
448 1.1 mrg for (int i = 0; i < ehdr.e_shnum; i++)
449 1.1 mrg {
450 1.1 mrg if (sections[i].sh_type != SHT_SYMTAB)
451 1.1 mrg continue;
452 1.1 mrg
453 1.1 mrg char *data = (char *)xmalloc (sections[i].sh_size);
454 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
455 1.1 mrg || fread (data, sections[i].sh_size, 1, outfd) != 1)
456 1.1 mrg {
457 1.1 mrg free (data);
458 1.1 mrg continue;
459 1.1 mrg }
460 1.1 mrg
461 1.1 mrg for (size_t offset = 0;
462 1.1 mrg offset < sections[i].sh_size;
463 1.1 mrg offset += sections[i].sh_entsize)
464 1.1 mrg {
465 1.1 mrg Elf64_Sym *sym = (Elf64_Sym *) (data + offset);
466 1.1 mrg int type = ELF64_ST_TYPE (sym->st_info);
467 1.1 mrg int bind = ELF64_ST_BIND (sym->st_info);
468 1.1 mrg
469 1.1 mrg if (bind == STB_GLOBAL && sym->st_shndx == 0)
470 1.1 mrg sym->st_info = ELF64_ST_INFO (STB_WEAK, type);
471 1.1 mrg }
472 1.1 mrg
473 1.1 mrg /* Write back our symbol changes. */
474 1.1 mrg if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
475 1.1 mrg fwrite (data, sections[i].sh_size, 1, outfd);
476 1.1 mrg
477 1.1 mrg free (data);
478 1.1 mrg }
479 1.1 mrg free (sections);
480 1.1 mrg
481 1.1 mrg /* Write back our header changes. */
482 1.1 mrg rewind (outfd);
483 1.1 mrg fwrite (&ehdr, sizeof (ehdr), 1, outfd);
484 1.1 mrg
485 1.1 mrg fclose (outfd);
486 1.1 mrg return true;
487 1.1 mrg }
488 1.1 mrg
489 1.1 mrg /* Parse an input assembler file, extract the offload tables etc.,
490 1.1 mrg and output (1) the assembler code, minus the tables (which can contain
491 1.1 mrg problematic relocations), and (2) a C file with the offload tables
492 1.1 mrg encoded as structured data. */
493 1.1 mrg
494 1.1 mrg static void
495 1.1 mrg process_asm (FILE *in, FILE *out, FILE *cfile)
496 1.1 mrg {
497 1.1 mrg int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0;
498 1.1 mrg struct obstack fns_os, dims_os, regcounts_os;
499 1.1 mrg obstack_init (&fns_os);
500 1.1 mrg obstack_init (&dims_os);
501 1.1 mrg obstack_init (®counts_os);
502 1.1 mrg
503 1.1 mrg struct oaccdims
504 1.1 mrg {
505 1.1 mrg int d[3];
506 1.1 mrg char *name;
507 1.1 mrg } dim;
508 1.1 mrg
509 1.1 mrg struct regcount
510 1.1 mrg {
511 1.1 mrg int sgpr_count;
512 1.1 mrg int vgpr_count;
513 1.1 mrg char *kernel_name;
514 1.1 mrg } regcount = { -1, -1, NULL };
515 1.1 mrg
516 1.1 mrg /* Always add _init_array and _fini_array as kernels. */
517 1.1 mrg obstack_ptr_grow (&fns_os, xstrdup ("_init_array"));
518 1.1 mrg obstack_ptr_grow (&fns_os, xstrdup ("_fini_array"));
519 1.1 mrg fn_count += 2;
520 1.1 mrg
521 1.1 mrg char buf[1000];
522 1.1 mrg enum
523 1.1 mrg { IN_CODE,
524 1.1 mrg IN_METADATA,
525 1.1 mrg IN_VARS,
526 1.1 mrg IN_FUNCS
527 1.1 mrg } state = IN_CODE;
528 1.1 mrg while (fgets (buf, sizeof (buf), in))
529 1.1 mrg {
530 1.1 mrg switch (state)
531 1.1 mrg {
532 1.1 mrg case IN_CODE:
533 1.1 mrg {
534 1.1 mrg if (sscanf (buf, " ;; OPENACC-DIMS: %d, %d, %d : %ms\n",
535 1.1 mrg &dim.d[0], &dim.d[1], &dim.d[2], &dim.name) == 4)
536 1.1 mrg {
537 1.1 mrg obstack_grow (&dims_os, &dim, sizeof (dim));
538 1.1 mrg dims_count++;
539 1.1 mrg }
540 1.1 mrg
541 1.1 mrg break;
542 1.1 mrg }
543 1.1 mrg case IN_METADATA:
544 1.1 mrg {
545 1.1 mrg if (sscanf (buf, " - .name: %ms\n", ®count.kernel_name) == 1)
546 1.1 mrg break;
547 1.1 mrg else if (sscanf (buf, " .sgpr_count: %d\n",
548 1.1 mrg ®count.sgpr_count) == 1)
549 1.1 mrg {
550 1.1 mrg gcc_assert (regcount.kernel_name);
551 1.1 mrg break;
552 1.1 mrg }
553 1.1 mrg else if (sscanf (buf, " .vgpr_count: %d\n",
554 1.1 mrg ®count.vgpr_count) == 1)
555 1.1 mrg {
556 1.1 mrg gcc_assert (regcount.kernel_name);
557 1.1 mrg break;
558 1.1 mrg }
559 1.1 mrg
560 1.1 mrg break;
561 1.1 mrg }
562 1.1 mrg case IN_VARS:
563 1.1 mrg {
564 1.1 mrg char *varname;
565 1.1 mrg unsigned varsize;
566 1.1 mrg if (sscanf (buf, " .8byte %ms\n", &varname))
567 1.1 mrg {
568 1.1 mrg fputs (buf, out);
569 1.1 mrg fgets (buf, sizeof (buf), in);
570 1.1 mrg if (!sscanf (buf, " .8byte %u\n", &varsize))
571 1.1 mrg abort ();
572 1.1 mrg var_count++;
573 1.1 mrg }
574 1.1 mrg break;
575 1.1 mrg }
576 1.1 mrg case IN_FUNCS:
577 1.1 mrg {
578 1.1 mrg char *funcname;
579 1.1 mrg if (sscanf (buf, "\t.8byte\t%ms\n", &funcname))
580 1.1 mrg {
581 1.1 mrg obstack_ptr_grow (&fns_os, funcname);
582 1.1 mrg fn_count++;
583 1.1 mrg continue;
584 1.1 mrg }
585 1.1 mrg break;
586 1.1 mrg }
587 1.1 mrg }
588 1.1 mrg
589 1.1 mrg char dummy;
590 1.1 mrg if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0)
591 1.1 mrg {
592 1.1 mrg state = IN_VARS;
593 1.1 mrg
594 1.1 mrg /* Add a global symbol to allow plugin-gcn.c to locate the table
595 1.1 mrg at runtime. It can't use the "offload_var_table.N" emitted by
596 1.1 mrg the compiler because a) they're not global, and b) there's one
597 1.1 mrg for each input file combined into the binary. */
598 1.1 mrg fputs (buf, out);
599 1.1 mrg fputs ("\t.global .offload_var_table\n"
600 1.1 mrg "\t.type .offload_var_table, @object\n"
601 1.1 mrg ".offload_var_table:\n",
602 1.1 mrg out);
603 1.1 mrg }
604 1.1 mrg else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
605 1.1 mrg state = IN_FUNCS;
606 1.1 mrg else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
607 1.1 mrg {
608 1.1 mrg state = IN_METADATA;
609 1.1 mrg regcount.kernel_name = NULL;
610 1.1 mrg regcount.sgpr_count = regcount.vgpr_count = -1;
611 1.1 mrg }
612 1.1 mrg else if (sscanf (buf, " .section %c", &dummy) > 0
613 1.1 mrg || sscanf (buf, " .text%c", &dummy) > 0
614 1.1 mrg || sscanf (buf, " .bss%c", &dummy) > 0
615 1.1 mrg || sscanf (buf, " .data%c", &dummy) > 0
616 1.1 mrg || sscanf (buf, " .ident %c", &dummy) > 0)
617 1.1 mrg state = IN_CODE;
618 1.1 mrg else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0)
619 1.1 mrg {
620 1.1 mrg state = IN_CODE;
621 1.1 mrg gcc_assert (regcount.kernel_name != NULL
622 1.1 mrg && regcount.sgpr_count >= 0
623 1.1 mrg && regcount.vgpr_count >= 0);
624 1.1 mrg obstack_grow (®counts_os, ®count, sizeof (regcount));
625 1.1 mrg regcount_count++;
626 1.1 mrg regcount.kernel_name = NULL;
627 1.1 mrg regcount.sgpr_count = regcount.vgpr_count = -1;
628 1.1 mrg }
629 1.1 mrg
630 1.1 mrg if (state == IN_CODE || state == IN_METADATA || state == IN_VARS)
631 1.1 mrg fputs (buf, out);
632 1.1 mrg }
633 1.1 mrg
634 1.1 mrg char **fns = XOBFINISH (&fns_os, char **);
635 1.1 mrg struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
636 1.1 mrg struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *);
637 1.1 mrg
638 1.1 mrg fprintf (cfile, "#include <stdlib.h>\n");
639 1.1 mrg fprintf (cfile, "#include <stdbool.h>\n\n");
640 1.1 mrg
641 1.1 mrg fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
642 1.1 mrg
643 1.1 mrg /* Dump out function idents. */
644 1.1 mrg fprintf (cfile, "static const struct hsa_kernel_description {\n"
645 1.1 mrg " const char *name;\n"
646 1.1 mrg " int oacc_dims[3];\n"
647 1.1 mrg " int sgpr_count;\n"
648 1.1 mrg " int vgpr_count;\n"
649 1.1 mrg "} gcn_kernels[] = {\n ");
650 1.1 mrg dim.d[0] = dim.d[1] = dim.d[2] = 0;
651 1.1 mrg const char *comma;
652 1.1 mrg int i;
653 1.1 mrg for (comma = "", i = 0; i < fn_count; comma = ",\n ", i++)
654 1.1 mrg {
655 1.1 mrg /* Find if we recorded dimensions for this function. */
656 1.1 mrg int *d = dim.d; /* Previously zeroed. */
657 1.1 mrg int sgpr_count = 0;
658 1.1 mrg int vgpr_count = 0;
659 1.1 mrg for (int j = 0; j < dims_count; j++)
660 1.1 mrg if (strcmp (fns[i], dims[j].name) == 0)
661 1.1 mrg {
662 1.1 mrg d = dims[j].d;
663 1.1 mrg break;
664 1.1 mrg }
665 1.1 mrg for (int j = 0; j < regcount_count; j++)
666 1.1 mrg if (strcmp (fns[i], regcounts[j].kernel_name) == 0)
667 1.1 mrg {
668 1.1 mrg sgpr_count = regcounts[j].sgpr_count;
669 1.1 mrg vgpr_count = regcounts[j].vgpr_count;
670 1.1 mrg break;
671 1.1 mrg }
672 1.1 mrg
673 1.1 mrg fprintf (cfile, "%s{\"%s\", {%d, %d, %d}, %d, %d}", comma,
674 1.1 mrg fns[i], d[0], d[1], d[2], sgpr_count, vgpr_count);
675 1.1 mrg
676 1.1 mrg free (fns[i]);
677 1.1 mrg }
678 1.1 mrg fprintf (cfile, "\n};\n\n");
679 1.1 mrg
680 1.1 mrg obstack_free (&fns_os, NULL);
681 1.1 mrg for (i = 0; i < dims_count; i++)
682 1.1 mrg free (dims[i].name);
683 1.1 mrg for (i = 0; i < regcount_count; i++)
684 1.1 mrg free (regcounts[i].kernel_name);
685 1.1 mrg obstack_free (&dims_os, NULL);
686 1.1 mrg obstack_free (®counts_os, NULL);
687 1.1 mrg }
688 1.1 mrg
689 1.1 mrg /* Embed an object file into a C source file. */
690 1.1 mrg
691 1.1 mrg static void
692 1.1 mrg process_obj (FILE *in, FILE *cfile)
693 1.1 mrg {
694 1.1 mrg size_t len = 0;
695 1.1 mrg const char *input = read_file (in, &len);
696 1.1 mrg
697 1.1 mrg /* Dump out an array containing the binary.
698 1.1 mrg FIXME: do this with objcopy. */
699 1.1 mrg fprintf (cfile, "static unsigned char gcn_code[] = {");
700 1.1 mrg for (size_t i = 0; i < len; i += 17)
701 1.1 mrg {
702 1.1 mrg fprintf (cfile, "\n\t");
703 1.1 mrg for (size_t j = i; j < i + 17 && j < len; j++)
704 1.1 mrg fprintf (cfile, "%3u,", (unsigned char) input[j]);
705 1.1 mrg }
706 1.1 mrg fprintf (cfile, "\n};\n\n");
707 1.1 mrg
708 1.1 mrg fprintf (cfile,
709 1.1 mrg "static const struct gcn_image {\n"
710 1.1 mrg " size_t size;\n"
711 1.1 mrg " void *image;\n"
712 1.1 mrg "} gcn_image = {\n"
713 1.1 mrg " %zu,\n"
714 1.1 mrg " gcn_code\n"
715 1.1 mrg "};\n\n",
716 1.1 mrg len);
717 1.1 mrg
718 1.1 mrg fprintf (cfile,
719 1.1 mrg "static const struct gcn_image_desc {\n"
720 1.1 mrg " const struct gcn_image *gcn_image;\n"
721 1.1 mrg " unsigned kernel_count;\n"
722 1.1 mrg " const struct hsa_kernel_description *kernel_infos;\n"
723 1.1 mrg " unsigned global_variable_count;\n"
724 1.1 mrg "} target_data = {\n"
725 1.1 mrg " &gcn_image,\n"
726 1.1 mrg " sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n"
727 1.1 mrg " gcn_kernels,\n"
728 1.1 mrg " gcn_num_vars\n"
729 1.1 mrg "};\n\n");
730 1.1 mrg
731 1.1 mrg fprintf (cfile,
732 1.1 mrg "#ifdef __cplusplus\n"
733 1.1 mrg "extern \"C\" {\n"
734 1.1 mrg "#endif\n"
735 1.1 mrg "extern void GOMP_offload_register_ver"
736 1.1 mrg " (unsigned, const void *, int, const void *);\n"
737 1.1 mrg "extern void GOMP_offload_unregister_ver"
738 1.1 mrg " (unsigned, const void *, int, const void *);\n"
739 1.1 mrg "#ifdef __cplusplus\n"
740 1.1 mrg "}\n"
741 1.1 mrg "#endif\n\n");
742 1.1 mrg
743 1.1 mrg fprintf (cfile, "extern const void *const __OFFLOAD_TABLE__[];\n\n");
744 1.1 mrg
745 1.1 mrg fprintf (cfile, "static __attribute__((constructor)) void init (void)\n"
746 1.1 mrg "{\n"
747 1.1 mrg " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__,"
748 1.1 mrg " %d/*GCN*/, &target_data);\n"
749 1.1 mrg "};\n",
750 1.1 mrg GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN),
751 1.1 mrg GOMP_DEVICE_GCN);
752 1.1 mrg
753 1.1 mrg fprintf (cfile, "static __attribute__((destructor)) void fini (void)\n"
754 1.1 mrg "{\n"
755 1.1 mrg " GOMP_offload_unregister_ver (%#x, __OFFLOAD_TABLE__,"
756 1.1 mrg " %d/*GCN*/, &target_data);\n"
757 1.1 mrg "};\n",
758 1.1 mrg GOMP_VERSION_PACK (GOMP_VERSION, GOMP_VERSION_GCN),
759 1.1 mrg GOMP_DEVICE_GCN);
760 1.1 mrg }
761 1.1 mrg
762 1.1 mrg /* Compile a C file using the host compiler. */
763 1.1 mrg
764 1.1 mrg static void
765 1.1 mrg compile_native (const char *infile, const char *outfile, const char *compiler,
766 1.1 mrg bool fPIC, bool fpic)
767 1.1 mrg {
768 1.1 mrg const char *collect_gcc_options = getenv ("COLLECT_GCC_OPTIONS");
769 1.1 mrg if (!collect_gcc_options)
770 1.1 mrg fatal_error (input_location,
771 1.1 mrg "environment variable COLLECT_GCC_OPTIONS must be set");
772 1.1 mrg
773 1.1 mrg struct obstack argv_obstack;
774 1.1 mrg obstack_init (&argv_obstack);
775 1.1 mrg obstack_ptr_grow (&argv_obstack, compiler);
776 1.1 mrg if (fPIC)
777 1.1 mrg obstack_ptr_grow (&argv_obstack, "-fPIC");
778 1.1 mrg if (fpic)
779 1.1 mrg obstack_ptr_grow (&argv_obstack, "-fpic");
780 1.1 mrg if (save_temps)
781 1.1 mrg obstack_ptr_grow (&argv_obstack, "-save-temps");
782 1.1 mrg if (verbose)
783 1.1 mrg obstack_ptr_grow (&argv_obstack, "-v");
784 1.1 mrg obstack_ptr_grow (&argv_obstack, "-dumpdir");
785 1.1 mrg obstack_ptr_grow (&argv_obstack, "");
786 1.1 mrg obstack_ptr_grow (&argv_obstack, "-dumpbase");
787 1.1 mrg obstack_ptr_grow (&argv_obstack, gcn_dumpbase);
788 1.1 mrg obstack_ptr_grow (&argv_obstack, "-dumpbase-ext");
789 1.1 mrg obstack_ptr_grow (&argv_obstack, ".c");
790 1.1 mrg switch (offload_abi)
791 1.1 mrg {
792 1.1 mrg case OFFLOAD_ABI_LP64:
793 1.1 mrg obstack_ptr_grow (&argv_obstack, "-m64");
794 1.1 mrg break;
795 1.1 mrg case OFFLOAD_ABI_ILP32:
796 1.1 mrg obstack_ptr_grow (&argv_obstack, "-m32");
797 1.1 mrg break;
798 1.1 mrg default:
799 1.1 mrg gcc_unreachable ();
800 1.1 mrg }
801 1.1 mrg obstack_ptr_grow (&argv_obstack, infile);
802 1.1 mrg obstack_ptr_grow (&argv_obstack, "-c");
803 1.1 mrg obstack_ptr_grow (&argv_obstack, "-o");
804 1.1 mrg obstack_ptr_grow (&argv_obstack, outfile);
805 1.1 mrg obstack_ptr_grow (&argv_obstack, NULL);
806 1.1 mrg
807 1.1 mrg const char **new_argv = XOBFINISH (&argv_obstack, const char **);
808 1.1 mrg fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
809 1.1 mrg ".gccnative_args");
810 1.1 mrg obstack_free (&argv_obstack, NULL);
811 1.1 mrg }
812 1.1 mrg
813 1.1 mrg int
814 1.1 mrg main (int argc, char **argv)
815 1.1 mrg {
816 1.1 mrg FILE *in = stdin;
817 1.1 mrg FILE *out = stdout;
818 1.1 mrg FILE *cfile = stdout;
819 1.1 mrg const char *outname = 0;
820 1.1 mrg
821 1.1 mrg progname = "mkoffload";
822 1.1 mrg diagnostic_initialize (global_dc, 0);
823 1.1 mrg
824 1.1 mrg obstack_init (&files_to_cleanup);
825 1.1 mrg if (atexit (mkoffload_cleanup) != 0)
826 1.1 mrg fatal_error (input_location, "atexit failed");
827 1.1 mrg
828 1.1 mrg char *collect_gcc = getenv ("COLLECT_GCC");
829 1.1 mrg if (collect_gcc == NULL)
830 1.1 mrg fatal_error (input_location, "COLLECT_GCC must be set.");
831 1.1 mrg const char *gcc_path = dirname (ASTRDUP (collect_gcc));
832 1.1 mrg const char *gcc_exec = basename (ASTRDUP (collect_gcc));
833 1.1 mrg
834 1.1 mrg size_t len = (strlen (gcc_path) + 1 + strlen (GCC_INSTALL_NAME) + 1);
835 1.1 mrg char *driver = XALLOCAVEC (char, len);
836 1.1 mrg
837 1.1 mrg if (strcmp (gcc_exec, collect_gcc) == 0)
838 1.1 mrg /* collect_gcc has no path, so it was found in PATH. Make sure we also
839 1.1 mrg find accel-gcc in PATH. */
840 1.1 mrg gcc_path = NULL;
841 1.1 mrg
842 1.1 mrg int driver_used = 0;
843 1.1 mrg if (gcc_path != NULL)
844 1.1 mrg driver_used = sprintf (driver, "%s/", gcc_path);
845 1.1 mrg sprintf (driver + driver_used, "%s", GCC_INSTALL_NAME);
846 1.1 mrg
847 1.1 mrg bool found = false;
848 1.1 mrg if (gcc_path == NULL)
849 1.1 mrg found = true;
850 1.1 mrg else if (access_check (driver, X_OK) == 0)
851 1.1 mrg found = true;
852 1.1 mrg else
853 1.1 mrg {
854 1.1 mrg /* Don't use alloca pointer with XRESIZEVEC. */
855 1.1 mrg driver = NULL;
856 1.1 mrg /* Look in all COMPILER_PATHs for GCC_INSTALL_NAME. */
857 1.1 mrg char **paths = NULL;
858 1.1 mrg unsigned n_paths;
859 1.1 mrg n_paths = parse_env_var (getenv ("COMPILER_PATH"), &paths);
860 1.1 mrg for (unsigned i = 0; i < n_paths; i++)
861 1.1 mrg {
862 1.1 mrg len = strlen (paths[i]) + 1 + strlen (GCC_INSTALL_NAME) + 1;
863 1.1 mrg driver = XRESIZEVEC (char, driver, len);
864 1.1 mrg sprintf (driver, "%s/%s", paths[i], GCC_INSTALL_NAME);
865 1.1 mrg if (access_check (driver, X_OK) == 0)
866 1.1 mrg {
867 1.1 mrg found = true;
868 1.1 mrg break;
869 1.1 mrg }
870 1.1 mrg }
871 1.1 mrg free_array_of_ptrs ((void **) paths, n_paths);
872 1.1 mrg }
873 1.1 mrg
874 1.1 mrg if (!found)
875 1.1 mrg fatal_error (input_location,
876 1.1 mrg "offload compiler %s not found", GCC_INSTALL_NAME);
877 1.1 mrg
878 1.1 mrg /* We may be called with all the arguments stored in some file and
879 1.1 mrg passed with @file. Expand them into argv before processing. */
880 1.1 mrg expandargv (&argc, &argv);
881 1.1 mrg
882 1.1 mrg /* Scan the argument vector. */
883 1.1 mrg bool fopenmp = false;
884 1.1 mrg bool fopenacc = false;
885 1.1 mrg bool fPIC = false;
886 1.1 mrg bool fpic = false;
887 1.1 mrg bool sram_seen = false;
888 1.1 mrg for (int i = 1; i < argc; i++)
889 1.1 mrg {
890 1.1 mrg #define STR "-foffload-abi="
891 1.1 mrg if (startswith (argv[i], STR))
892 1.1 mrg {
893 1.1 mrg if (strcmp (argv[i] + strlen (STR), "lp64") == 0)
894 1.1 mrg offload_abi = OFFLOAD_ABI_LP64;
895 1.1 mrg else if (strcmp (argv[i] + strlen (STR), "ilp32") == 0)
896 1.1 mrg offload_abi = OFFLOAD_ABI_ILP32;
897 1.1 mrg else
898 1.1 mrg fatal_error (input_location,
899 1.1 mrg "unrecognizable argument of option " STR);
900 1.1 mrg }
901 1.1 mrg #undef STR
902 1.1 mrg else if (strcmp (argv[i], "-fopenmp") == 0)
903 1.1 mrg fopenmp = true;
904 1.1 mrg else if (strcmp (argv[i], "-fopenacc") == 0)
905 1.1 mrg fopenacc = true;
906 1.1 mrg else if (strcmp (argv[i], "-fPIC") == 0)
907 1.1 mrg fPIC = true;
908 1.1 mrg else if (strcmp (argv[i], "-fpic") == 0)
909 1.1 mrg fpic = true;
910 1.1 mrg else if (strcmp (argv[i], "-mxnack") == 0)
911 1.1 mrg SET_XNACK_ON (elf_flags);
912 1.1 mrg else if (strcmp (argv[i], "-mno-xnack") == 0)
913 1.1 mrg SET_XNACK_OFF (elf_flags);
914 1.1 mrg else if (strcmp (argv[i], "-msram-ecc=on") == 0)
915 1.1 mrg {
916 1.1 mrg SET_SRAM_ECC_ON (elf_flags);
917 1.1 mrg sram_seen = true;
918 1.1 mrg }
919 1.1 mrg else if (strcmp (argv[i], "-msram-ecc=any") == 0)
920 1.1 mrg {
921 1.1 mrg SET_SRAM_ECC_ANY (elf_flags);
922 1.1 mrg sram_seen = true;
923 1.1 mrg }
924 1.1 mrg else if (strcmp (argv[i], "-msram-ecc=off") == 0)
925 1.1 mrg {
926 1.1 mrg SET_SRAM_ECC_OFF (elf_flags);
927 1.1 mrg sram_seen = true;
928 1.1 mrg }
929 1.1 mrg else if (strcmp (argv[i], "-save-temps") == 0)
930 1.1 mrg save_temps = true;
931 1.1 mrg else if (strcmp (argv[i], "-v") == 0)
932 1.1 mrg verbose = true;
933 1.1 mrg else if (strcmp (argv[i], "-dumpbase") == 0
934 1.1 mrg && i + 1 < argc)
935 1.1 mrg dumppfx = argv[++i];
936 1.1 mrg else if (strcmp (argv[i], "-march=fiji") == 0)
937 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
938 1.1 mrg else if (strcmp (argv[i], "-march=gfx900") == 0)
939 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
940 1.1 mrg else if (strcmp (argv[i], "-march=gfx906") == 0)
941 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
942 1.1 mrg else if (strcmp (argv[i], "-march=gfx908") == 0)
943 1.1 mrg elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
944 1.1 mrg }
945 1.1 mrg
946 1.1 mrg if (!(fopenacc ^ fopenmp))
947 1.1 mrg fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
948 1.1 mrg
949 1.1 mrg if (!sram_seen)
950 1.1 mrg {
951 1.1 mrg #ifdef HAVE_GCN_ASM_V3_SYNTAX
952 1.1 mrg /* For HSACOv3, the SRAM-ECC feature defaults to "on" on GPUs where the
953 1.1 mrg feature is available.
954 1.1 mrg (HSACOv4 has elf_flags initialsed to "any" in all cases.) */
955 1.1 mrg switch (elf_arch)
956 1.1 mrg {
957 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX803:
958 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX900:
959 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX906:
960 1.1 mrg #ifndef HAVE_GCN_SRAM_ECC_GFX908
961 1.1 mrg case EF_AMDGPU_MACH_AMDGCN_GFX908:
962 1.1 mrg #endif
963 1.1 mrg break;
964 1.1 mrg default:
965 1.1 mrg SET_SRAM_ECC_ON (elf_flags);
966 1.1 mrg break;
967 1.1 mrg }
968 1.1 mrg #endif
969 1.1 mrg }
970 1.1 mrg
971 1.1 mrg const char *abi;
972 1.1 mrg switch (offload_abi)
973 1.1 mrg {
974 1.1 mrg case OFFLOAD_ABI_LP64:
975 1.1 mrg abi = "-m64";
976 1.1 mrg break;
977 1.1 mrg case OFFLOAD_ABI_ILP32:
978 1.1 mrg abi = "-m32";
979 1.1 mrg break;
980 1.1 mrg default:
981 1.1 mrg gcc_unreachable ();
982 1.1 mrg }
983 1.1 mrg
984 1.1 mrg /* Build arguments for compiler pass. */
985 1.1 mrg struct obstack cc_argv_obstack;
986 1.1 mrg obstack_init (&cc_argv_obstack);
987 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, driver);
988 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-S");
989 1.1 mrg
990 1.1 mrg if (save_temps)
991 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-save-temps");
992 1.1 mrg if (verbose)
993 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-v");
994 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, abi);
995 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-xlto");
996 1.1 mrg if (fopenmp)
997 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
998 1.1 mrg
999 1.1 mrg for (int ix = 1; ix != argc; ix++)
1000 1.1 mrg {
1001 1.1 mrg if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
1002 1.1 mrg outname = argv[++ix];
1003 1.1 mrg else
1004 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, argv[ix]);
1005 1.1 mrg }
1006 1.1 mrg
1007 1.1 mrg if (!dumppfx)
1008 1.1 mrg dumppfx = outname;
1009 1.1 mrg
1010 1.1 mrg gcn_dumpbase = concat (dumppfx, ".c", NULL);
1011 1.1 mrg
1012 1.1 mrg const char *gcn_cfile_name;
1013 1.1 mrg if (save_temps)
1014 1.1 mrg gcn_cfile_name = gcn_dumpbase;
1015 1.1 mrg else
1016 1.1 mrg gcn_cfile_name = make_temp_file (".c");
1017 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name);
1018 1.1 mrg
1019 1.1 mrg cfile = fopen (gcn_cfile_name, "w");
1020 1.1 mrg if (!cfile)
1021 1.1 mrg fatal_error (input_location, "cannot open '%s'", gcn_cfile_name);
1022 1.1 mrg
1023 1.1 mrg /* Currently, we only support offloading in 64-bit configurations. */
1024 1.1 mrg if (offload_abi == OFFLOAD_ABI_LP64)
1025 1.1 mrg {
1026 1.1 mrg const char *mko_dumpbase = concat (dumppfx, ".mkoffload", NULL);
1027 1.1 mrg const char *hsaco_dumpbase = concat (dumppfx, ".mkoffload.hsaco", NULL);
1028 1.1 mrg
1029 1.1 mrg const char *gcn_s1_name;
1030 1.1 mrg const char *gcn_s2_name;
1031 1.1 mrg const char *gcn_o_name;
1032 1.1 mrg if (save_temps)
1033 1.1 mrg {
1034 1.1 mrg gcn_s1_name = concat (mko_dumpbase, ".1.s", NULL);
1035 1.1 mrg gcn_s2_name = concat (mko_dumpbase, ".2.s", NULL);
1036 1.1 mrg gcn_o_name = hsaco_dumpbase;
1037 1.1 mrg }
1038 1.1 mrg else
1039 1.1 mrg {
1040 1.1 mrg gcn_s1_name = make_temp_file (".mkoffload.1.s");
1041 1.1 mrg gcn_s2_name = make_temp_file (".mkoffload.2.s");
1042 1.1 mrg gcn_o_name = make_temp_file (".mkoffload.hsaco");
1043 1.1 mrg }
1044 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_s1_name);
1045 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_s2_name);
1046 1.1 mrg obstack_ptr_grow (&files_to_cleanup, gcn_o_name);
1047 1.1 mrg
1048 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpdir");
1049 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "");
1050 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase");
1051 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, mko_dumpbase);
1052 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase-ext");
1053 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "");
1054 1.1 mrg
1055 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-o");
1056 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, gcn_s1_name);
1057 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, NULL);
1058 1.1 mrg const char **cc_argv = XOBFINISH (&cc_argv_obstack, const char **);
1059 1.1 mrg
1060 1.1 mrg /* Build arguments for assemble/link pass. */
1061 1.1 mrg struct obstack ld_argv_obstack;
1062 1.1 mrg obstack_init (&ld_argv_obstack);
1063 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, driver);
1064 1.1 mrg
1065 1.1 mrg /* Extract early-debug information from the input objects.
1066 1.1 mrg This loop finds all the inputs that end ".o" and aren't the output. */
1067 1.1 mrg int dbgcount = 0;
1068 1.1 mrg for (int ix = 1; ix != argc; ix++)
1069 1.1 mrg {
1070 1.1 mrg if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
1071 1.1 mrg ++ix;
1072 1.1 mrg else
1073 1.1 mrg {
1074 1.1 mrg if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0)
1075 1.1 mrg {
1076 1.1 mrg char *dbgobj;
1077 1.1 mrg if (save_temps)
1078 1.1 mrg {
1079 1.1 mrg char buf[10];
1080 1.1 mrg sprintf (buf, "%d", dbgcount++);
1081 1.1 mrg dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL);
1082 1.1 mrg }
1083 1.1 mrg else
1084 1.1 mrg dbgobj = make_temp_file (".mkoffload.dbg.o");
1085 1.1 mrg obstack_ptr_grow (&files_to_cleanup, dbgobj);
1086 1.1 mrg
1087 1.1 mrg /* If the copy fails then just ignore it. */
1088 1.1 mrg if (copy_early_debug_info (argv[ix], dbgobj))
1089 1.1 mrg {
1090 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, dbgobj);
1091 1.1 mrg obstack_ptr_grow (&files_to_cleanup, dbgobj);
1092 1.1 mrg }
1093 1.1 mrg else
1094 1.1 mrg free (dbgobj);
1095 1.1 mrg }
1096 1.1 mrg }
1097 1.1 mrg }
1098 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
1099 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
1100 1.1 mrg obstack_ptr_grow (&ld_argv_obstack,
1101 1.1 mrg (TEST_XNACK (elf_flags)
1102 1.1 mrg ? "-mxnack" : "-mno-xnack"));
1103 1.1 mrg obstack_ptr_grow (&ld_argv_obstack,
1104 1.1 mrg (TEST_SRAM_ECC_ON (elf_flags) ? "-msram-ecc=on"
1105 1.1 mrg : TEST_SRAM_ECC_ANY (elf_flags) ? "-msram-ecc=any"
1106 1.1 mrg : "-msram-ecc=off"));
1107 1.1 mrg if (verbose)
1108 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, "-v");
1109 1.1 mrg
1110 1.1 mrg for (int i = 1; i < argc; i++)
1111 1.1 mrg if (startswith (argv[i], "-l")
1112 1.1 mrg || startswith (argv[i], "-Wl")
1113 1.1 mrg || startswith (argv[i], "-march"))
1114 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, argv[i]);
1115 1.1 mrg
1116 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpdir");
1117 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "");
1118 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase");
1119 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, hsaco_dumpbase);
1120 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "-dumpbase-ext");
1121 1.1 mrg obstack_ptr_grow (&cc_argv_obstack, "");
1122 1.1 mrg
1123 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, "-o");
1124 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, gcn_o_name);
1125 1.1 mrg obstack_ptr_grow (&ld_argv_obstack, NULL);
1126 1.1 mrg const char **ld_argv = XOBFINISH (&ld_argv_obstack, const char **);
1127 1.1 mrg
1128 1.1 mrg /* Clean up unhelpful environment variables. */
1129 1.1 mrg char *execpath = getenv ("GCC_EXEC_PREFIX");
1130 1.1 mrg char *cpath = getenv ("COMPILER_PATH");
1131 1.1 mrg char *lpath = getenv ("LIBRARY_PATH");
1132 1.1 mrg unsetenv ("GCC_EXEC_PREFIX");
1133 1.1 mrg unsetenv ("COMPILER_PATH");
1134 1.1 mrg unsetenv ("LIBRARY_PATH");
1135 1.1 mrg
1136 1.1 mrg /* Run the compiler pass. */
1137 1.1 mrg fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true, ".gcc_args");
1138 1.1 mrg obstack_free (&cc_argv_obstack, NULL);
1139 1.1 mrg
1140 1.1 mrg in = fopen (gcn_s1_name, "r");
1141 1.1 mrg if (!in)
1142 1.1 mrg fatal_error (input_location, "cannot open intermediate gcn asm file");
1143 1.1 mrg
1144 1.1 mrg out = fopen (gcn_s2_name, "w");
1145 1.1 mrg if (!out)
1146 1.1 mrg fatal_error (input_location, "cannot open '%s'", gcn_s2_name);
1147 1.1 mrg
1148 1.1 mrg process_asm (in, out, cfile);
1149 1.1 mrg
1150 1.1 mrg fclose (in);
1151 1.1 mrg fclose (out);
1152 1.1 mrg
1153 1.1 mrg /* Run the assemble/link pass. */
1154 1.1 mrg fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
1155 1.1 mrg obstack_free (&ld_argv_obstack, NULL);
1156 1.1 mrg
1157 1.1 mrg in = fopen (gcn_o_name, "r");
1158 1.1 mrg if (!in)
1159 1.1 mrg fatal_error (input_location, "cannot open intermediate gcn obj file");
1160 1.1 mrg
1161 1.1 mrg process_obj (in, cfile);
1162 1.1 mrg
1163 1.1 mrg fclose (in);
1164 1.1 mrg
1165 1.1 mrg xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
1166 1.1 mrg xputenv (concat ("COMPILER_PATH=", cpath, NULL));
1167 1.1 mrg xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
1168 1.1 mrg }
1169 1.1 mrg
1170 1.1 mrg fclose (cfile);
1171 1.1 mrg
1172 1.1 mrg compile_native (gcn_cfile_name, outname, collect_gcc, fPIC, fpic);
1173 1.1 mrg
1174 1.1 mrg return 0;
1175 1.1 mrg }
1176