1/*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29#include "gen_device_info.h"
30#include "compiler/shader_enums.h"
31#include "util/bitscan.h"
32#include "util/macros.h"
33
34#include "drm-uapi/i915_drm.h"
35
36/**
37 * Get the PCI ID for the device name.
38 *
39 * Returns -1 if the device is not known.
40 */
41int
42gen_device_name_to_pci_device_id(const char *name)
43{
44   static const struct {
45      const char *name;
46      int pci_id;
47   } name_map[] = {
48      { "brw", 0x2a02 },
49      { "g4x", 0x2a42 },
50      { "ilk", 0x0042 },
51      { "snb", 0x0126 },
52      { "ivb", 0x016a },
53      { "hsw", 0x0d2e },
54      { "byt", 0x0f33 },
55      { "bdw", 0x162e },
56      { "chv", 0x22B3 },
57      { "skl", 0x1912 },
58      { "bxt", 0x5A85 },
59      { "kbl", 0x5912 },
60      { "aml", 0x591C },
61      { "glk", 0x3185 },
62      { "cfl", 0x3E9B },
63      { "whl", 0x3EA1 },
64      { "cml", 0x9b41 },
65      { "cnl", 0x5a52 },
66      { "icl", 0x8a52 },
67   };
68
69   for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
70      if (!strcmp(name_map[i].name, name))
71         return name_map[i].pci_id;
72   }
73
74   return -1;
75}
76
77/**
78 * Get the overridden PCI ID for the device. This is set with the
79 * INTEL_DEVID_OVERRIDE environment variable.
80 *
81 * Returns -1 if the override is not set.
82 */
83int
84gen_get_pci_device_id_override(void)
85{
86   if (!issetugid()) {
87      const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
88      if (devid_override) {
89         const int id = gen_device_name_to_pci_device_id(devid_override);
90         return id >= 0 ? id : strtol(devid_override, NULL, 0);
91      }
92   }
93
94   return -1;
95}
96
97static const struct gen_device_info gen_device_info_i965 = {
98   .gen = 4,
99   .has_negative_rhw_bug = true,
100   .num_slices = 1,
101   .num_subslices = { 1, },
102   .num_eu_per_subslice = 8,
103   .num_thread_per_eu = 4,
104   .max_vs_threads = 16,
105   .max_gs_threads = 2,
106   .max_wm_threads = 8 * 4,
107   .urb = {
108      .size = 256,
109   },
110   .timestamp_frequency = 12500000,
111   .simulator_id = -1,
112};
113
114static const struct gen_device_info gen_device_info_g4x = {
115   .gen = 4,
116   .has_pln = true,
117   .has_compr4 = true,
118   .has_surface_tile_offset = true,
119   .is_g4x = true,
120   .num_slices = 1,
121   .num_subslices = { 1, },
122   .num_eu_per_subslice = 10,
123   .num_thread_per_eu = 5,
124   .max_vs_threads = 32,
125   .max_gs_threads = 2,
126   .max_wm_threads = 10 * 5,
127   .urb = {
128      .size = 384,
129   },
130   .timestamp_frequency = 12500000,
131   .simulator_id = -1,
132};
133
134static const struct gen_device_info gen_device_info_ilk = {
135   .gen = 5,
136   .has_pln = true,
137   .has_compr4 = true,
138   .has_surface_tile_offset = true,
139   .num_slices = 1,
140   .num_subslices = { 1, },
141   .num_eu_per_subslice = 12,
142   .num_thread_per_eu = 6,
143   .max_vs_threads = 72,
144   .max_gs_threads = 32,
145   .max_wm_threads = 12 * 6,
146   .urb = {
147      .size = 1024,
148   },
149   .timestamp_frequency = 12500000,
150   .simulator_id = -1,
151};
152
153static const struct gen_device_info gen_device_info_snb_gt1 = {
154   .gen = 6,
155   .gt = 1,
156   .has_hiz_and_separate_stencil = true,
157   .has_llc = true,
158   .has_pln = true,
159   .has_surface_tile_offset = true,
160   .needs_unlit_centroid_workaround = true,
161   .num_slices = 1,
162   .num_subslices = { 1, },
163   .num_eu_per_subslice = 6,
164   .num_thread_per_eu = 6, /* Not confirmed */
165   .max_vs_threads = 24,
166   .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
167   .max_wm_threads = 40,
168   .urb = {
169      .size = 32,
170      .min_entries = {
171         [MESA_SHADER_VERTEX]   = 24,
172      },
173      .max_entries = {
174         [MESA_SHADER_VERTEX]   = 256,
175         [MESA_SHADER_GEOMETRY] = 256,
176      },
177   },
178   .timestamp_frequency = 12500000,
179   .simulator_id = -1,
180};
181
182static const struct gen_device_info gen_device_info_snb_gt2 = {
183   .gen = 6,
184   .gt = 2,
185   .has_hiz_and_separate_stencil = true,
186   .has_llc = true,
187   .has_pln = true,
188   .has_surface_tile_offset = true,
189   .needs_unlit_centroid_workaround = true,
190   .num_slices = 1,
191   .num_subslices = { 1, },
192   .num_eu_per_subslice = 12,
193   .num_thread_per_eu = 6, /* Not confirmed */
194   .max_vs_threads = 60,
195   .max_gs_threads = 60,
196   .max_wm_threads = 80,
197   .urb = {
198      .size = 64,
199      .min_entries = {
200         [MESA_SHADER_VERTEX]   = 24,
201      },
202      .max_entries = {
203         [MESA_SHADER_VERTEX]   = 256,
204         [MESA_SHADER_GEOMETRY] = 256,
205      },
206   },
207   .timestamp_frequency = 12500000,
208   .simulator_id = -1,
209};
210
211#define GEN7_FEATURES                               \
212   .gen = 7,                                        \
213   .has_hiz_and_separate_stencil = true,            \
214   .must_use_separate_stencil = true,               \
215   .has_llc = true,                                 \
216   .has_pln = true,                                 \
217   .has_64bit_types = true,                         \
218   .has_surface_tile_offset = true,                 \
219   .timestamp_frequency = 12500000
220
221static const struct gen_device_info gen_device_info_ivb_gt1 = {
222   GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
223   .num_slices = 1,
224   .num_subslices = { 1, },
225   .num_eu_per_subslice = 6,
226   .num_thread_per_eu = 6,
227   .l3_banks = 2,
228   .max_vs_threads = 36,
229   .max_tcs_threads = 36,
230   .max_tes_threads = 36,
231   .max_gs_threads = 36,
232   .max_wm_threads = 48,
233   .max_cs_threads = 36,
234   .urb = {
235      .size = 128,
236      .min_entries = {
237         [MESA_SHADER_VERTEX]    = 32,
238         [MESA_SHADER_TESS_EVAL] = 10,
239      },
240      .max_entries = {
241         [MESA_SHADER_VERTEX]    = 512,
242         [MESA_SHADER_TESS_CTRL] = 32,
243         [MESA_SHADER_TESS_EVAL] = 288,
244         [MESA_SHADER_GEOMETRY]  = 192,
245      },
246   },
247   .simulator_id = 7,
248};
249
250static const struct gen_device_info gen_device_info_ivb_gt2 = {
251   GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
252   .num_slices = 1,
253   .num_subslices = { 1, },
254   .num_eu_per_subslice = 12,
255   .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
256                            * @max_wm_threads ... */
257   .l3_banks = 4,
258   .max_vs_threads = 128,
259   .max_tcs_threads = 128,
260   .max_tes_threads = 128,
261   .max_gs_threads = 128,
262   .max_wm_threads = 172,
263   .max_cs_threads = 64,
264   .urb = {
265      .size = 256,
266      .min_entries = {
267         [MESA_SHADER_VERTEX]    = 32,
268         [MESA_SHADER_TESS_EVAL] = 10,
269      },
270      .max_entries = {
271         [MESA_SHADER_VERTEX]    = 704,
272         [MESA_SHADER_TESS_CTRL] = 64,
273         [MESA_SHADER_TESS_EVAL] = 448,
274         [MESA_SHADER_GEOMETRY]  = 320,
275      },
276   },
277   .simulator_id = 7,
278};
279
280static const struct gen_device_info gen_device_info_byt = {
281   GEN7_FEATURES, .is_baytrail = true, .gt = 1,
282   .num_slices = 1,
283   .num_subslices = { 1, },
284   .num_eu_per_subslice = 4,
285   .num_thread_per_eu = 8,
286   .l3_banks = 1,
287   .has_llc = false,
288   .max_vs_threads = 36,
289   .max_tcs_threads = 36,
290   .max_tes_threads = 36,
291   .max_gs_threads = 36,
292   .max_wm_threads = 48,
293   .max_cs_threads = 32,
294   .urb = {
295      .size = 128,
296      .min_entries = {
297         [MESA_SHADER_VERTEX]    = 32,
298         [MESA_SHADER_TESS_EVAL] = 10,
299      },
300      .max_entries = {
301         [MESA_SHADER_VERTEX]    = 512,
302         [MESA_SHADER_TESS_CTRL] = 32,
303         [MESA_SHADER_TESS_EVAL] = 288,
304         [MESA_SHADER_GEOMETRY]  = 192,
305      },
306   },
307   .simulator_id = 10,
308};
309
310#define HSW_FEATURES             \
311   GEN7_FEATURES,                \
312   .is_haswell = true,           \
313   .supports_simd16_3src = true, \
314   .has_resource_streamer = true
315
316static const struct gen_device_info gen_device_info_hsw_gt1 = {
317   HSW_FEATURES, .gt = 1,
318   .num_slices = 1,
319   .num_subslices = { 1, },
320   .num_eu_per_subslice = 10,
321   .num_thread_per_eu = 7,
322   .l3_banks = 2,
323   .max_vs_threads = 70,
324   .max_tcs_threads = 70,
325   .max_tes_threads = 70,
326   .max_gs_threads = 70,
327   .max_wm_threads = 102,
328   .max_cs_threads = 70,
329   .urb = {
330      .size = 128,
331      .min_entries = {
332         [MESA_SHADER_VERTEX]    = 32,
333         [MESA_SHADER_TESS_EVAL] = 10,
334      },
335      .max_entries = {
336         [MESA_SHADER_VERTEX]    = 640,
337         [MESA_SHADER_TESS_CTRL] = 64,
338         [MESA_SHADER_TESS_EVAL] = 384,
339         [MESA_SHADER_GEOMETRY]  = 256,
340      },
341   },
342   .simulator_id = 9,
343};
344
345static const struct gen_device_info gen_device_info_hsw_gt2 = {
346   HSW_FEATURES, .gt = 2,
347   .num_slices = 1,
348   .num_subslices = { 2, },
349   .num_eu_per_subslice = 10,
350   .num_thread_per_eu = 7,
351   .l3_banks = 4,
352   .max_vs_threads = 280,
353   .max_tcs_threads = 256,
354   .max_tes_threads = 280,
355   .max_gs_threads = 256,
356   .max_wm_threads = 204,
357   .max_cs_threads = 70,
358   .urb = {
359      .size = 256,
360      .min_entries = {
361         [MESA_SHADER_VERTEX]    = 64,
362         [MESA_SHADER_TESS_EVAL] = 10,
363      },
364      .max_entries = {
365         [MESA_SHADER_VERTEX]    = 1664,
366         [MESA_SHADER_TESS_CTRL] = 128,
367         [MESA_SHADER_TESS_EVAL] = 960,
368         [MESA_SHADER_GEOMETRY]  = 640,
369      },
370   },
371   .simulator_id = 9,
372};
373
374static const struct gen_device_info gen_device_info_hsw_gt3 = {
375   HSW_FEATURES, .gt = 3,
376   .num_slices = 2,
377   .num_subslices = { 2, },
378   .num_eu_per_subslice = 10,
379   .num_thread_per_eu = 7,
380   .l3_banks = 8,
381   .max_vs_threads = 280,
382   .max_tcs_threads = 256,
383   .max_tes_threads = 280,
384   .max_gs_threads = 256,
385   .max_wm_threads = 408,
386   .max_cs_threads = 70,
387   .urb = {
388      .size = 512,
389      .min_entries = {
390         [MESA_SHADER_VERTEX]    = 64,
391         [MESA_SHADER_TESS_EVAL] = 10,
392      },
393      .max_entries = {
394         [MESA_SHADER_VERTEX]    = 1664,
395         [MESA_SHADER_TESS_CTRL] = 128,
396         [MESA_SHADER_TESS_EVAL] = 960,
397         [MESA_SHADER_GEOMETRY]  = 640,
398      },
399   },
400   .simulator_id = 9,
401};
402
403/* It's unclear how well supported sampling from the hiz buffer is on GEN8,
404 * so keep things conservative for now and set has_sample_with_hiz = false.
405 */
406#define GEN8_FEATURES                               \
407   .gen = 8,                                        \
408   .has_hiz_and_separate_stencil = true,            \
409   .has_resource_streamer = true,                   \
410   .must_use_separate_stencil = true,               \
411   .has_llc = true,                                 \
412   .has_sample_with_hiz = false,                    \
413   .has_pln = true,                                 \
414   .has_integer_dword_mul = true,                   \
415   .has_64bit_types = true,                         \
416   .supports_simd16_3src = true,                    \
417   .has_surface_tile_offset = true,                 \
418   .num_thread_per_eu = 7,                          \
419   .max_vs_threads = 504,                           \
420   .max_tcs_threads = 504,                          \
421   .max_tes_threads = 504,                          \
422   .max_gs_threads = 504,                           \
423   .max_wm_threads = 384,                           \
424   .timestamp_frequency = 12500000
425
426static const struct gen_device_info gen_device_info_bdw_gt1 = {
427   GEN8_FEATURES, .gt = 1,
428   .is_broadwell = true,
429   .num_slices = 1,
430   .num_subslices = { 2, },
431   .num_eu_per_subslice = 8,
432   .l3_banks = 2,
433   .max_cs_threads = 42,
434   .urb = {
435      .size = 192,
436      .min_entries = {
437         [MESA_SHADER_VERTEX]    = 64,
438         [MESA_SHADER_TESS_EVAL] = 34,
439      },
440      .max_entries = {
441         [MESA_SHADER_VERTEX]    = 2560,
442         [MESA_SHADER_TESS_CTRL] = 504,
443         [MESA_SHADER_TESS_EVAL] = 1536,
444         [MESA_SHADER_GEOMETRY]  = 960,
445      },
446   },
447   .simulator_id = 11,
448};
449
450static const struct gen_device_info gen_device_info_bdw_gt2 = {
451   GEN8_FEATURES, .gt = 2,
452   .is_broadwell = true,
453   .num_slices = 1,
454   .num_subslices = { 3, },
455   .num_eu_per_subslice = 8,
456   .l3_banks = 4,
457   .max_cs_threads = 56,
458   .urb = {
459      .size = 384,
460      .min_entries = {
461         [MESA_SHADER_VERTEX]    = 64,
462         [MESA_SHADER_TESS_EVAL] = 34,
463      },
464      .max_entries = {
465         [MESA_SHADER_VERTEX]    = 2560,
466         [MESA_SHADER_TESS_CTRL] = 504,
467         [MESA_SHADER_TESS_EVAL] = 1536,
468         [MESA_SHADER_GEOMETRY]  = 960,
469      },
470   },
471   .simulator_id = 11,
472};
473
474static const struct gen_device_info gen_device_info_bdw_gt3 = {
475   GEN8_FEATURES, .gt = 3,
476   .is_broadwell = true,
477   .num_slices = 2,
478   .num_subslices = { 3, 3, },
479   .num_eu_per_subslice = 8,
480   .l3_banks = 8,
481   .max_cs_threads = 56,
482   .urb = {
483      .size = 384,
484      .min_entries = {
485         [MESA_SHADER_VERTEX]    = 64,
486         [MESA_SHADER_TESS_EVAL] = 34,
487      },
488      .max_entries = {
489         [MESA_SHADER_VERTEX]    = 2560,
490         [MESA_SHADER_TESS_CTRL] = 504,
491         [MESA_SHADER_TESS_EVAL] = 1536,
492         [MESA_SHADER_GEOMETRY]  = 960,
493      },
494   },
495   .simulator_id = 11,
496};
497
498static const struct gen_device_info gen_device_info_chv = {
499   GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
500   .has_llc = false,
501   .has_integer_dword_mul = false,
502   .num_slices = 1,
503   .num_subslices = { 2, },
504   .num_eu_per_subslice = 8,
505   .l3_banks = 2,
506   .max_vs_threads = 80,
507   .max_tcs_threads = 80,
508   .max_tes_threads = 80,
509   .max_gs_threads = 80,
510   .max_wm_threads = 128,
511   .max_cs_threads = 6 * 7,
512   .urb = {
513      .size = 192,
514      .min_entries = {
515         [MESA_SHADER_VERTEX]    = 34,
516         [MESA_SHADER_TESS_EVAL] = 34,
517      },
518      .max_entries = {
519         [MESA_SHADER_VERTEX]    = 640,
520         [MESA_SHADER_TESS_CTRL] = 80,
521         [MESA_SHADER_TESS_EVAL] = 384,
522         [MESA_SHADER_GEOMETRY]  = 256,
523      },
524   },
525   .simulator_id = 13,
526};
527
528#define GEN9_HW_INFO                                \
529   .gen = 9,                                        \
530   .max_vs_threads = 336,                           \
531   .max_gs_threads = 336,                           \
532   .max_tcs_threads = 336,                          \
533   .max_tes_threads = 336,                          \
534   .max_cs_threads = 56,                            \
535   .timestamp_frequency = 12000000,                 \
536   .urb = {                                         \
537      .size = 384,                                  \
538      .min_entries = {                              \
539         [MESA_SHADER_VERTEX]    = 64,              \
540         [MESA_SHADER_TESS_EVAL] = 34,              \
541      },                                            \
542      .max_entries = {                              \
543         [MESA_SHADER_VERTEX]    = 1856,            \
544         [MESA_SHADER_TESS_CTRL] = 672,             \
545         [MESA_SHADER_TESS_EVAL] = 1120,            \
546         [MESA_SHADER_GEOMETRY]  = 640,             \
547      },                                            \
548   }
549
550#define GEN9_LP_FEATURES                           \
551   GEN8_FEATURES,                                  \
552   GEN9_HW_INFO,                                   \
553   .has_integer_dword_mul = false,                 \
554   .gt = 1,                                        \
555   .has_llc = false,                               \
556   .has_sample_with_hiz = true,                    \
557   .num_slices = 1,                                \
558   .num_thread_per_eu = 6,                         \
559   .max_vs_threads = 112,                          \
560   .max_tcs_threads = 112,                         \
561   .max_tes_threads = 112,                         \
562   .max_gs_threads = 112,                          \
563   .max_cs_threads = 6 * 6,                        \
564   .timestamp_frequency = 19200000,                \
565   .urb = {                                        \
566      .size = 192,                                 \
567      .min_entries = {                             \
568         [MESA_SHADER_VERTEX]    = 34,             \
569         [MESA_SHADER_TESS_EVAL] = 34,             \
570      },                                           \
571      .max_entries = {                             \
572         [MESA_SHADER_VERTEX]    = 704,            \
573         [MESA_SHADER_TESS_CTRL] = 256,            \
574         [MESA_SHADER_TESS_EVAL] = 416,            \
575         [MESA_SHADER_GEOMETRY]  = 256,            \
576      },                                           \
577   }
578
579#define GEN9_LP_FEATURES_3X6                       \
580   GEN9_LP_FEATURES,                               \
581   .num_subslices = { 3, },                        \
582   .num_eu_per_subslice = 6
583
584#define GEN9_LP_FEATURES_2X6                       \
585   GEN9_LP_FEATURES,                               \
586   .num_subslices = { 2, },                        \
587   .num_eu_per_subslice = 6,                       \
588   .max_vs_threads = 56,                           \
589   .max_tcs_threads = 56,                          \
590   .max_tes_threads = 56,                          \
591   .max_gs_threads = 56,                           \
592   .max_cs_threads = 6 * 6,                        \
593   .urb = {                                        \
594      .size = 128,                                 \
595      .min_entries = {                             \
596         [MESA_SHADER_VERTEX]    = 34,             \
597         [MESA_SHADER_TESS_EVAL] = 34,             \
598      },                                           \
599      .max_entries = {                             \
600         [MESA_SHADER_VERTEX]    = 352,            \
601         [MESA_SHADER_TESS_CTRL] = 128,            \
602         [MESA_SHADER_TESS_EVAL] = 208,            \
603         [MESA_SHADER_GEOMETRY]  = 128,            \
604      },                                           \
605   }
606
607#define GEN9_FEATURES                               \
608   GEN8_FEATURES,                                   \
609   GEN9_HW_INFO,                                    \
610   .has_sample_with_hiz = true
611
612static const struct gen_device_info gen_device_info_skl_gt1 = {
613   GEN9_FEATURES, .gt = 1,
614   .is_skylake = true,
615   .num_slices = 1,
616   .num_subslices = { 2, },
617   .num_eu_per_subslice = 6,
618   .l3_banks = 2,
619   .urb.size = 192,
620   /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
621    * leading to some vertices to go missing if we use too much URB.
622    */
623   .urb.max_entries[MESA_SHADER_VERTEX] = 928,
624   .simulator_id = 12,
625};
626
627static const struct gen_device_info gen_device_info_skl_gt2 = {
628   GEN9_FEATURES, .gt = 2,
629   .is_skylake = true,
630   .num_slices = 1,
631   .num_subslices = { 3, },
632   .num_eu_per_subslice = 8,
633   .l3_banks = 4,
634   .simulator_id = 12,
635};
636
637static const struct gen_device_info gen_device_info_skl_gt3 = {
638   GEN9_FEATURES, .gt = 3,
639   .is_skylake = true,
640   .num_slices = 2,
641   .num_subslices = { 3, 3, },
642   .num_eu_per_subslice = 8,
643   .l3_banks = 8,
644   .simulator_id = 12,
645};
646
647static const struct gen_device_info gen_device_info_skl_gt4 = {
648   GEN9_FEATURES, .gt = 4,
649   .is_skylake = true,
650   .num_slices = 3,
651   .num_subslices = { 3, 3, 3, },
652   .num_eu_per_subslice = 8,
653   .l3_banks = 12,
654   /* From the "L3 Allocation and Programming" documentation:
655    *
656    * "URB is limited to 1008KB due to programming restrictions.  This is not a
657    * restriction of the L3 implementation, but of the FF and other clients.
658    * Therefore, in a GT4 implementation it is possible for the programmed
659    * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
660    * only 1008KB of this will be used."
661    */
662   .urb.size = 1008 / 3,
663   .simulator_id = 12,
664};
665
666static const struct gen_device_info gen_device_info_bxt = {
667   GEN9_LP_FEATURES_3X6,
668   .is_broxton = true,
669   .l3_banks = 2,
670   .simulator_id = 14,
671};
672
673static const struct gen_device_info gen_device_info_bxt_2x6 = {
674   GEN9_LP_FEATURES_2X6,
675   .is_broxton = true,
676   .l3_banks = 1,
677   .simulator_id = 14,
678};
679/*
680 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
681 * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
682 */
683
684static const struct gen_device_info gen_device_info_kbl_gt1 = {
685   GEN9_FEATURES,
686   .is_kabylake = true,
687   .gt = 1,
688
689   .max_cs_threads = 7 * 6,
690   .urb.size = 192,
691   .num_slices = 1,
692   .num_subslices = { 2, },
693   .num_eu_per_subslice = 6,
694   .l3_banks = 2,
695   /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
696    * leading to some vertices to go missing if we use too much URB.
697    */
698   .urb.max_entries[MESA_SHADER_VERTEX] = 928,
699   .simulator_id = 16,
700};
701
702static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
703   GEN9_FEATURES,
704   .is_kabylake = true,
705   .gt = 1,
706
707   .max_cs_threads = 7 * 6,
708   .num_slices = 1,
709   .num_subslices = { 3, },
710   .num_eu_per_subslice = 6,
711   .l3_banks = 4,
712   .simulator_id = 16,
713};
714
715static const struct gen_device_info gen_device_info_kbl_gt2 = {
716   GEN9_FEATURES,
717   .is_kabylake = true,
718   .gt = 2,
719
720   .num_slices = 1,
721   .num_subslices = { 3, },
722   .num_eu_per_subslice = 8,
723   .l3_banks = 4,
724   .simulator_id = 16,
725};
726
727static const struct gen_device_info gen_device_info_kbl_gt3 = {
728   GEN9_FEATURES,
729   .is_kabylake = true,
730   .gt = 3,
731
732   .num_slices = 2,
733   .num_subslices = { 3, 3, },
734   .num_eu_per_subslice = 8,
735   .l3_banks = 8,
736   .simulator_id = 16,
737};
738
739static const struct gen_device_info gen_device_info_kbl_gt4 = {
740   GEN9_FEATURES,
741   .is_kabylake = true,
742   .gt = 4,
743
744   /*
745    * From the "L3 Allocation and Programming" documentation:
746    *
747    * "URB is limited to 1008KB due to programming restrictions.  This
748    *  is not a restriction of the L3 implementation, but of the FF and
749    *  other clients.  Therefore, in a GT4 implementation it is
750    *  possible for the programmed allocation of the L3 data array to
751    *  provide 3*384KB=1152KB for URB, but only 1008KB of this
752    *  will be used."
753    */
754   .urb.size = 1008 / 3,
755   .num_slices = 3,
756   .num_subslices = { 3, 3, 3, },
757   .num_eu_per_subslice = 8,
758   .l3_banks = 12,
759   .simulator_id = 16,
760};
761
762static const struct gen_device_info gen_device_info_glk = {
763   GEN9_LP_FEATURES_3X6,
764   .is_geminilake = true,
765   .l3_banks = 2,
766   .simulator_id = 17,
767};
768
769static const struct gen_device_info gen_device_info_glk_2x6 = {
770   GEN9_LP_FEATURES_2X6,
771   .is_geminilake = true,
772   .l3_banks = 2,
773   .simulator_id = 17,
774};
775
776static const struct gen_device_info gen_device_info_cfl_gt1 = {
777   GEN9_FEATURES,
778   .is_coffeelake = true,
779   .gt = 1,
780
781   .num_slices = 1,
782   .num_subslices = { 2, },
783   .num_eu_per_subslice = 6,
784   .l3_banks = 2,
785   .urb.size = 192,
786   /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
787    * leading to some vertices to go missing if we use too much URB.
788    */
789   .urb.max_entries[MESA_SHADER_VERTEX] = 928,
790   .simulator_id = 24,
791};
792static const struct gen_device_info gen_device_info_cfl_gt2 = {
793   GEN9_FEATURES,
794   .is_coffeelake = true,
795   .gt = 2,
796
797   .num_slices = 1,
798   .num_subslices = { 3, },
799   .num_eu_per_subslice = 8,
800   .l3_banks = 4,
801   .simulator_id = 24,
802};
803
804static const struct gen_device_info gen_device_info_cfl_gt3 = {
805   GEN9_FEATURES,
806   .is_coffeelake = true,
807   .gt = 3,
808
809   .num_slices = 2,
810   .num_subslices = { 3, 3, },
811   .num_eu_per_subslice = 8,
812   .l3_banks = 8,
813   .simulator_id = 24,
814};
815
816#define GEN10_HW_INFO                               \
817   .gen = 10,                                       \
818   .num_thread_per_eu = 7,                          \
819   .max_vs_threads = 728,                           \
820   .max_gs_threads = 432,                           \
821   .max_tcs_threads = 432,                          \
822   .max_tes_threads = 624,                          \
823   .max_cs_threads = 56,                            \
824   .timestamp_frequency = 19200000,                 \
825   .urb = {                                         \
826      .size = 256,                                  \
827      .min_entries = {                              \
828         [MESA_SHADER_VERTEX]    = 64,              \
829         [MESA_SHADER_TESS_EVAL] = 34,              \
830      },                                            \
831      .max_entries = {                              \
832      [MESA_SHADER_VERTEX]       = 3936,            \
833      [MESA_SHADER_TESS_CTRL]    = 896,             \
834      [MESA_SHADER_TESS_EVAL]    = 2064,            \
835      [MESA_SHADER_GEOMETRY]     = 832,             \
836      },                                            \
837   }
838
839#define subslices(args...) { args, }
840
841#define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \
842   GEN8_FEATURES,                                   \
843   GEN10_HW_INFO,                                   \
844   .has_sample_with_hiz = true,                     \
845   .gt = _gt,                                       \
846   .num_slices = _slices,                           \
847   .num_subslices = _subslices,                     \
848   .num_eu_per_subslice = 8,                        \
849   .l3_banks = _l3
850
851static const struct gen_device_info gen_device_info_cnl_2x8 = {
852   /* GT0.5 */
853   GEN10_FEATURES(1, 1, subslices(2), 2),
854   .is_cannonlake = true,
855   .simulator_id = 15,
856};
857
858static const struct gen_device_info gen_device_info_cnl_3x8 = {
859   /* GT1 */
860   GEN10_FEATURES(1, 1, subslices(3), 3),
861   .is_cannonlake = true,
862   .simulator_id = 15,
863};
864
865static const struct gen_device_info gen_device_info_cnl_4x8 = {
866   /* GT 1.5 */
867   GEN10_FEATURES(1, 2, subslices(2, 2), 6),
868   .is_cannonlake = true,
869   .simulator_id = 15,
870};
871
872static const struct gen_device_info gen_device_info_cnl_5x8 = {
873   /* GT2 */
874   GEN10_FEATURES(2, 2, subslices(3, 2), 6),
875   .is_cannonlake = true,
876   .simulator_id = 15,
877};
878
879#define GEN11_HW_INFO                               \
880   .gen = 11,                                       \
881   .has_pln = false,                                \
882   .max_vs_threads = 364,                           \
883   .max_gs_threads = 224,                           \
884   .max_tcs_threads = 224,                          \
885   .max_tes_threads = 364,                          \
886   .max_cs_threads = 56
887
888#define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
889   GEN8_FEATURES,                                     \
890   GEN11_HW_INFO,                                     \
891   .has_64bit_types = false,                          \
892   .has_integer_dword_mul = false,                    \
893   .has_sample_with_hiz = false,                      \
894   .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
895   .num_subslices = _subslices,                       \
896   .num_eu_per_subslice = 8
897
898#define GEN11_URB_MIN_MAX_ENTRIES                     \
899   .min_entries = {                                   \
900      [MESA_SHADER_VERTEX]    = 64,                   \
901      [MESA_SHADER_TESS_EVAL] = 34,                   \
902   },                                                 \
903   .max_entries = {                                   \
904      [MESA_SHADER_VERTEX]    = 2384,                 \
905      [MESA_SHADER_TESS_CTRL] = 1032,                 \
906      [MESA_SHADER_TESS_EVAL] = 2384,                 \
907      [MESA_SHADER_GEOMETRY]  = 1032,                 \
908   }
909
910static const struct gen_device_info gen_device_info_icl_8x8 = {
911   GEN11_FEATURES(2, 1, subslices(8), 8),
912   .urb = {
913      .size = 1024,
914      GEN11_URB_MIN_MAX_ENTRIES,
915   },
916   .simulator_id = 19,
917};
918
919static const struct gen_device_info gen_device_info_icl_6x8 = {
920   GEN11_FEATURES(1, 1, subslices(6), 6),
921   .urb = {
922      .size = 768,
923      GEN11_URB_MIN_MAX_ENTRIES,
924   },
925   .simulator_id = 19,
926};
927
928static const struct gen_device_info gen_device_info_icl_4x8 = {
929   GEN11_FEATURES(1, 1, subslices(4), 6),
930   .urb = {
931      .size = 768,
932      GEN11_URB_MIN_MAX_ENTRIES,
933   },
934   .simulator_id = 19,
935};
936
937static const struct gen_device_info gen_device_info_icl_1x8 = {
938   GEN11_FEATURES(1, 1, subslices(1), 6),
939   .urb = {
940      .size = 768,
941      GEN11_URB_MIN_MAX_ENTRIES,
942   },
943   .simulator_id = 19,
944};
945
946static const struct gen_device_info gen_device_info_ehl_4x8 = {
947   GEN11_FEATURES(1, 1, subslices(4), 4),
948   .urb = {
949      .size = 512,
950      .min_entries = {
951         [MESA_SHADER_VERTEX]    = 64,
952         [MESA_SHADER_TESS_EVAL] = 34,
953      },
954      .max_entries = {
955         [MESA_SHADER_VERTEX]    = 2384,
956         [MESA_SHADER_TESS_CTRL] = 1032,
957         [MESA_SHADER_TESS_EVAL] = 2384,
958         [MESA_SHADER_GEOMETRY]  = 1032,
959      },
960   },
961   .simulator_id = 28,
962};
963
964/* FIXME: Verfiy below entries when more information is available for this SKU.
965 */
966static const struct gen_device_info gen_device_info_ehl_4x4 = {
967   GEN11_FEATURES(1, 1, subslices(4), 4),
968   .urb = {
969      .size = 512,
970      .min_entries = {
971         [MESA_SHADER_VERTEX]    = 64,
972         [MESA_SHADER_TESS_EVAL] = 34,
973      },
974      .max_entries = {
975         [MESA_SHADER_VERTEX]    = 2384,
976         [MESA_SHADER_TESS_CTRL] = 1032,
977         [MESA_SHADER_TESS_EVAL] = 2384,
978         [MESA_SHADER_GEOMETRY]  = 1032,
979      },
980   },
981   .num_eu_per_subslice = 4,
982   .simulator_id = 28,
983};
984
985/* FIXME: Verfiy below entries when more information is available for this SKU.
986 */
987static const struct gen_device_info gen_device_info_ehl_2x4 = {
988   GEN11_FEATURES(1, 1, subslices(2), 4),
989   .urb = {
990      .size = 512,
991      .min_entries = {
992         [MESA_SHADER_VERTEX]    = 64,
993         [MESA_SHADER_TESS_EVAL] = 34,
994      },
995      .max_entries = {
996         [MESA_SHADER_VERTEX]    = 2384,
997         [MESA_SHADER_TESS_CTRL] = 1032,
998         [MESA_SHADER_TESS_EVAL] = 2384,
999         [MESA_SHADER_GEOMETRY]  = 1032,
1000      },
1001   },
1002   .num_eu_per_subslice =4,
1003   .simulator_id = 28,
1004};
1005
1006static void
1007gen_device_info_set_eu_mask(struct gen_device_info *devinfo,
1008                            unsigned slice,
1009                            unsigned subslice,
1010                            unsigned eu_mask)
1011{
1012   unsigned subslice_offset = slice * devinfo->eu_slice_stride +
1013      subslice * devinfo->eu_subslice_stride;
1014
1015   for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {
1016      devinfo->eu_masks[subslice_offset + b_eu] =
1017         (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff;
1018   }
1019}
1020
1021/* Generate slice/subslice/eu masks from number of
1022 * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info
1023 * structure.
1024 *
1025 * These can be overridden with values reported by the kernel either from
1026 * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+
1027 * through the i915 query uapi.
1028 */
1029static void
1030fill_masks(struct gen_device_info *devinfo)
1031{
1032   devinfo->slice_masks = (1U << devinfo->num_slices) - 1;
1033
1034   /* Subslice masks */
1035   unsigned max_subslices = 0;
1036   for (int s = 0; s < devinfo->num_slices; s++)
1037      max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);
1038   devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
1039
1040   for (int s = 0; s < devinfo->num_slices; s++) {
1041      devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =
1042         (1U << devinfo->num_subslices[s]) - 1;
1043   }
1044
1045   /* EU masks */
1046   devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
1047   devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;
1048
1049   for (int s = 0; s < devinfo->num_slices; s++) {
1050      for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {
1051         gen_device_info_set_eu_mask(devinfo, s, ss,
1052                                     (1U << devinfo->num_eu_per_subslice) - 1);
1053      }
1054   }
1055}
1056
1057void
1058gen_device_info_update_from_masks(struct gen_device_info *devinfo,
1059                                  uint32_t slice_mask,
1060                                  uint32_t subslice_mask,
1061                                  uint32_t n_eus)
1062{
1063   struct {
1064      struct drm_i915_query_topology_info base;
1065   } topology;
1066
1067   assert((slice_mask & 0xff) == slice_mask);
1068
1069   memset(&topology, 0, sizeof(topology));
1070
1071   topology.base.max_slices = util_last_bit(slice_mask);
1072   topology.base.max_subslices = util_last_bit(subslice_mask);
1073
1074   topology.base.subslice_offset = DIV_ROUND_UP(topology.base.max_slices, 8);
1075   topology.base.subslice_stride = DIV_ROUND_UP(topology.base.max_subslices, 8);
1076
1077   uint32_t n_subslices = __builtin_popcount(slice_mask) *
1078      __builtin_popcount(subslice_mask);
1079   uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1080   uint32_t eu_mask = (1U << num_eu_per_subslice) - 1;
1081
1082   topology.base.eu_offset = topology.base.subslice_offset +
1083      DIV_ROUND_UP(topology.base.max_subslices, 8);
1084   topology.base.eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8);
1085
1086   /* Set slice mask in topology */
1087   for (int b = 0; b < topology.base.subslice_offset; b++)
1088      topology.base.data[b] = (slice_mask >> (b * 8)) & 0xff;
1089
1090   for (int s = 0; s < topology.base.max_slices; s++) {
1091
1092      /* Set subslice mask in topology */
1093      for (int b = 0; b < topology.base.subslice_stride; b++) {
1094         int subslice_offset = topology.base.subslice_offset +
1095            s * topology.base.subslice_stride + b;
1096
1097         topology.base.data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff;
1098      }
1099
1100      /* Set eu mask in topology */
1101      for (int ss = 0; ss < topology.base.max_subslices; ss++) {
1102         for (int b = 0; b < topology.base.eu_stride; b++) {
1103            int eu_offset = topology.base.eu_offset +
1104               (s * topology.base.max_subslices + ss) * topology.base.eu_stride + b;
1105
1106            topology.base.data[eu_offset] = (eu_mask >> (b * 8)) & 0xff;
1107         }
1108      }
1109   }
1110
1111   gen_device_info_update_from_topology(devinfo, &topology.base);
1112}
1113
1114static void
1115reset_masks(struct gen_device_info *devinfo)
1116{
1117   devinfo->subslice_slice_stride = 0;
1118   devinfo->eu_subslice_stride = 0;
1119   devinfo->eu_slice_stride = 0;
1120
1121   devinfo->num_slices = 0;
1122   devinfo->num_eu_per_subslice = 0;
1123   memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1124
1125   memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1126   memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1127   memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1128}
1129
1130void
1131gen_device_info_update_from_topology(struct gen_device_info *devinfo,
1132                                     const struct drm_i915_query_topology_info *topology)
1133{
1134   reset_masks(devinfo);
1135
1136   devinfo->subslice_slice_stride = topology->subslice_stride;
1137
1138   devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8);
1139   devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride;
1140
1141   assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
1142   memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
1143   devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1144
1145   uint32_t subslice_mask_len =
1146      topology->max_slices * topology->subslice_stride;
1147   assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);
1148   memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],
1149          subslice_mask_len);
1150
1151   uint32_t n_subslices = 0;
1152   for (int s = 0; s < topology->max_slices; s++) {
1153      if ((devinfo->slice_masks & (1UL << s)) == 0)
1154         continue;
1155
1156      for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1157         devinfo->num_subslices[s] +=
1158            __builtin_popcount(devinfo->subslice_masks[b]);
1159      }
1160      n_subslices += devinfo->num_subslices[s];
1161   }
1162   assert(n_subslices > 0);
1163
1164   uint32_t eu_mask_len =
1165      topology->eu_stride * topology->max_subslices * topology->max_slices;
1166   assert(sizeof(devinfo->eu_masks) >= eu_mask_len);
1167   memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len);
1168
1169   uint32_t n_eus = 0;
1170   for (int b = 0; b < eu_mask_len; b++)
1171      n_eus += __builtin_popcount(devinfo->eu_masks[b]);
1172
1173   devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1174}
1175
1176bool
1177gen_get_device_info(int devid, struct gen_device_info *devinfo)
1178{
1179   switch (devid) {
1180#undef CHIPSET
1181#define CHIPSET(id, family, name) \
1182      case id: *devinfo = gen_device_info_##family; break;
1183#include "pci_ids/i965_pci_ids.h"
1184   default:
1185      fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid);
1186      return false;
1187   }
1188
1189   fill_masks(devinfo);
1190
1191   /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1192    *
1193    * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1194    *  allocate scratch space enough so that each slice has 4 slices allowed."
1195    *
1196    * The equivalent internal documentation says that this programming note
1197    * applies to all Gen9+ platforms.
1198    *
1199    * The hardware typically calculates the scratch space pointer by taking
1200    * the base address, and adding per-thread-scratch-space * thread ID.
1201    * Extra padding can be necessary depending how the thread IDs are
1202    * calculated for a particular shader stage.
1203    */
1204
1205   switch(devinfo->gen) {
1206   case 9:
1207   case 10:
1208      devinfo->max_wm_threads = 64 /* threads-per-PSD */
1209                              * devinfo->num_slices
1210                              * 4; /* effective subslices per slice */
1211      break;
1212   case 11:
1213      devinfo->max_wm_threads = 128 /* threads-per-PSD */
1214                              * devinfo->num_slices
1215                              * 8; /* subslices per slice */
1216      break;
1217   default:
1218      break;
1219   }
1220
1221   assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1222
1223   return true;
1224}
1225
1226const char *
1227gen_get_device_name(int devid)
1228{
1229   switch (devid) {
1230#undef CHIPSET
1231#define CHIPSET(id, family, name) case id: return name;
1232#include "pci_ids/i965_pci_ids.h"
1233   default:
1234      return NULL;
1235   }
1236}
1237