1/*
2 * Copyright © 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
14 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
15 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
16 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
24 */
25
26#ifndef AC_SURFACE_H
27#define AC_SURFACE_H
28
29#include "amd_family.h"
30#include "util/format/u_format.h"
31
32/* NIR is optional. Some components don't want to include NIR with ac_surface.h. */
33#ifdef AC_SURFACE_INCLUDE_NIR
34#include "compiler/nir/nir_builder.h"
35#endif
36
37#include <stdbool.h>
38#include <stdint.h>
39#include <stdio.h>
40
41#ifdef __cplusplus
42extern "C" {
43#endif
44
45/* Forward declarations. */
46struct ac_addrlib;
47
48struct amdgpu_gpu_info;
49struct radeon_info;
50
51#define RADEON_SURF_MAX_LEVELS 15
52
53enum radeon_surf_mode
54{
55   RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
56   RADEON_SURF_MODE_1D = 2,
57   RADEON_SURF_MODE_2D = 3,
58};
59
60/* This describes D/S/Z/R swizzle modes.
61 * Defined in the GB_TILE_MODEn.MICRO_TILE_MODE_NEW order.
62 */
63enum radeon_micro_mode
64{
65   RADEON_MICRO_MODE_DISPLAY = 0,
66   RADEON_MICRO_MODE_STANDARD = 1,
67   RADEON_MICRO_MODE_DEPTH = 2,
68   RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
69};
70
71/* the first 16 bits are reserved for libdrm_radeon, don't use them */
72#define RADEON_SURF_SCANOUT      (1 << 16)
73#define RADEON_SURF_ZBUFFER      (1 << 17)
74#define RADEON_SURF_SBUFFER      (1 << 18)
75#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
76/* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
77#define RADEON_SURF_FMASK                 (1 << 21)
78#define RADEON_SURF_DISABLE_DCC           (1ull << 22)
79#define RADEON_SURF_TC_COMPATIBLE_HTILE   (1ull << 23)
80#define RADEON_SURF_IMPORTED              (1ull << 24)
81#define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1ull << 25)
82#define RADEON_SURF_SHAREABLE             (1ull << 26)
83#define RADEON_SURF_NO_RENDER_TARGET      (1ull << 27)
84/* Force a swizzle mode (gfx9+) or tile mode (gfx6-8).
85 * If this is not set, optimize for space. */
86#define RADEON_SURF_FORCE_SWIZZLE_MODE    (1ull << 28)
87#define RADEON_SURF_NO_FMASK              (1ull << 29)
88#define RADEON_SURF_NO_HTILE              (1ull << 30)
89#define RADEON_SURF_FORCE_MICRO_TILE_MODE (1ull << 31)
90#define RADEON_SURF_PRT                   (1ull << 32)
91
92struct legacy_surf_level {
93   uint32_t offset_256B;   /* divided by 256, the hw can only do 40-bit addresses */
94   uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */
95   unsigned nblk_x : 15;
96   unsigned nblk_y : 15;
97   enum radeon_surf_mode mode : 2;
98};
99
100struct legacy_surf_dcc_level {
101   uint32_t dcc_offset;    /* relative offset within DCC mip tree */
102   uint32_t dcc_fast_clear_size;
103   uint32_t dcc_slice_fast_clear_size;
104};
105
106struct legacy_surf_fmask {
107   unsigned slice_tile_max; /* max 4M */
108   uint8_t tiling_index;    /* max 31 */
109   uint8_t bankh;           /* max 8 */
110   uint16_t pitch_in_pixels;
111};
112
113struct legacy_surf_layout {
114   unsigned bankw : 4;               /* max 8 */
115   unsigned bankh : 4;               /* max 8 */
116   unsigned mtilea : 4;              /* max 8 */
117   unsigned tile_split : 13;         /* max 4K */
118   unsigned stencil_tile_split : 13; /* max 4K */
119   unsigned pipe_config : 5;         /* max 17 */
120   unsigned num_banks : 5;           /* max 16 */
121   unsigned macro_tile_index : 4;    /* max 15 */
122
123   /* Whether the depth miptree or stencil miptree as used by the DB are
124    * adjusted from their TC compatible form to ensure depth/stencil
125    * compatibility. If either is true, the corresponding plane cannot be
126    * sampled from.
127    */
128   unsigned depth_adjusted : 1;
129   unsigned stencil_adjusted : 1;
130
131   struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
132   uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
133
134   union {
135      /* Color layout */
136      struct {
137         struct legacy_surf_dcc_level dcc_level[RADEON_SURF_MAX_LEVELS];
138         struct legacy_surf_fmask fmask;
139         unsigned cmask_slice_tile_max;
140      } color;
141
142      /* Z/S layout */
143      struct {
144         struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
145         uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
146      } zs;
147   };
148};
149
150/* Same as addrlib - AddrResourceType. */
151enum gfx9_resource_type
152{
153   RADEON_RESOURCE_1D = 0,
154   RADEON_RESOURCE_2D,
155   RADEON_RESOURCE_3D,
156};
157
158struct gfx9_surf_meta_flags {
159   uint8_t rb_aligned : 1;   /* optimal for RBs */
160   uint8_t pipe_aligned : 1; /* optimal for TC */
161   uint8_t independent_64B_blocks : 1;
162   uint8_t independent_128B_blocks : 1;
163   uint8_t max_compressed_block_size : 2;
164   uint8_t display_equation_valid : 1;
165};
166
167struct gfx9_surf_level {
168   unsigned offset;
169   unsigned size; /* the size of one level in one layer (the image is an array of layers
170                   * where each layer has an array of levels) */
171};
172
173/**
174 * Meta address equation.
175 *
176 * DCC/HTILE address equation for doing DCC/HTILE address computations in shaders.
177 *
178 * ac_surface_meta_address_test.c contains the reference implementation.
179 * ac_nir_{dcc,htile}_addr_from_coord is the NIR implementation.
180 *
181 * For DCC:
182 * The gfx9 equation doesn't support mipmapping.
183 * The gfx10 equation doesn't support mipmapping and MSAA.
184 * (those are also limitations of Addr2ComputeDccAddrFromCoord)
185 *
186 * For HTILE:
187 * The gfx9 equation isn't implemented.
188 * The gfx10 equation doesn't support mipmapping.
189 */
190struct gfx9_meta_equation {
191   uint16_t meta_block_width;
192   uint16_t meta_block_height;
193   uint16_t meta_block_depth;
194
195   union {
196      /* The gfx9 DCC equation is chip-specific, and it varies with:
197       * - resource type
198       * - swizzle_mode
199       * - bpp
200       * - number of samples
201       * - number of fragments
202       * - pipe_aligned
203       * - rb_aligned
204       */
205      struct {
206         uint8_t num_bits;
207         uint8_t num_pipe_bits;
208
209         struct {
210            struct {
211               uint8_t dim:3; /* 0..4 */
212               uint8_t ord:5; /* 0..31 */
213            } coord[5]; /* 0..num_coords-1 */
214         } bit[20]; /* 0..num_bits-1 */
215      } gfx9;
216
217      /* The gfx10 DCC equation is chip-specific, it requires 64KB_R_X, and it varies with:
218       * - bpp
219       * - number of samples
220       * - number of fragments
221       * - pipe_aligned
222       *
223       * The gfx10 HTILE equation is chip-specific, it requires 64KB_Z_X, and it varies with:
224       * - number of samples
225       */
226      uint16_t gfx10_bits[64];
227   } u;
228};
229
230struct gfx9_surf_layout {
231   uint16_t epitch;           /* gfx9 only, not on gfx10 */
232   uint8_t swizzle_mode;      /* color or depth */
233
234   enum gfx9_resource_type resource_type:8; /* 1D, 2D or 3D */
235   uint16_t surf_pitch;                   /* in blocks */
236   uint16_t surf_height;
237
238   uint64_t surf_offset; /* 0 unless imported with an offset */
239   /* The size of the 2D plane containing all mipmap levels. */
240   uint64_t surf_slice_size;
241   /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
242   uint32_t offset[RADEON_SURF_MAX_LEVELS];
243   /* Mipmap level pitch in elements. Only valid for LINEAR. */
244   uint16_t pitch[RADEON_SURF_MAX_LEVELS];
245
246   uint16_t base_mip_width;
247   uint16_t base_mip_height;
248
249   /* Pitch of level in blocks, only valid for prt images. */
250   uint16_t prt_level_pitch[RADEON_SURF_MAX_LEVELS];
251   /* Offset within slice in bytes, only valid for prt images. */
252   uint32_t prt_level_offset[RADEON_SURF_MAX_LEVELS];
253
254   /* DCC or HTILE level info */
255   struct gfx9_surf_level meta_levels[RADEON_SURF_MAX_LEVELS];
256
257   union {
258      /* Color */
259      struct {
260         struct gfx9_surf_meta_flags dcc; /* metadata of color */
261         uint8_t fmask_swizzle_mode;
262         uint16_t fmask_epitch;     /* gfx9 only, not on gfx10 */
263
264         uint16_t dcc_pitch_max;
265         uint16_t dcc_height;
266
267         uint8_t dcc_block_width;
268         uint8_t dcc_block_height;
269         uint8_t dcc_block_depth;
270
271         /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
272          * The 3D engine doesn't support that layout except for chips with 1 RB.
273          * All other chips must set rb_aligned=1.
274          * A compute shader needs to convert from aligned DCC to unaligned.
275          */
276         uint8_t display_dcc_alignment_log2;
277         uint32_t display_dcc_size;
278         uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
279         uint16_t display_dcc_height;
280         bool dcc_retile_use_uint16;     /* if all values fit into uint16_t */
281         uint32_t dcc_retile_num_elements;
282         void *dcc_retile_map;
283
284         /* CMASK level info (only level 0) */
285         struct gfx9_surf_level cmask_level0;
286
287         /* For DCC retiling. */
288         struct gfx9_meta_equation dcc_equation; /* 2D only */
289         struct gfx9_meta_equation display_dcc_equation;
290
291         /* For FCE compute. */
292         struct gfx9_meta_equation cmask_equation; /* 2D only */
293      } color;
294
295      /* Z/S */
296      struct {
297         uint64_t stencil_offset; /* separate stencil */
298         uint16_t stencil_epitch;   /* gfx9 only, not on gfx10 */
299         uint8_t stencil_swizzle_mode;
300
301         /* For HTILE VRS. */
302         struct gfx9_meta_equation htile_equation;
303      } zs;
304   };
305};
306
307struct radeon_surf {
308   /* Format properties. */
309   uint8_t blk_w : 4;
310   uint8_t blk_h : 4;
311   uint8_t bpe : 5;
312   /* Display, standard(thin), depth, render(rotated). AKA D,S,Z,R swizzle modes. */
313   uint8_t micro_tile_mode : 3;
314   /* Number of mipmap levels where DCC or HTILE is enabled starting from level 0.
315    * Non-zero levels may be disabled due to alignment constraints, but not
316    * the first level.
317    */
318   uint8_t num_meta_levels : 4;
319   uint8_t is_linear : 1;
320   uint8_t has_stencil : 1;
321   /* This might be true even if micro_tile_mode isn't displayable or rotated. */
322   uint8_t is_displayable : 1;
323   uint8_t first_mip_tail_level : 4;
324
325   /* These are return values. Some of them can be set by the caller, but
326    * they will be treated as hints (e.g. bankw, bankh) and might be
327    * changed by the calculator.
328    */
329
330   /* Not supported yet for depth + stencil. */
331   uint16_t prt_tile_width;
332   uint16_t prt_tile_height;
333
334   /* Tile swizzle can be OR'd with low bits of the BASE_256B address.
335    * The value is the same for all mipmap levels. Supported tile modes:
336    * - GFX6: Only macro tiling.
337    * - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
338    *   tail.
339    *
340    * Only these surfaces are allowed to set it:
341    * - color (if it doesn't have to be displayable)
342    * - DCC (same tile swizzle as color)
343    * - FMASK
344    * - CMASK if it's TC-compatible or if the gen is GFX9
345    * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
346    */
347   uint8_t tile_swizzle;
348   uint8_t fmask_tile_swizzle;
349
350   /* Use (1 << log2) to compute the alignment. */
351   uint8_t surf_alignment_log2;
352   uint8_t fmask_alignment_log2;
353   uint8_t meta_alignment_log2; /* DCC or HTILE */
354   uint8_t cmask_alignment_log2;
355   uint8_t alignment_log2;
356
357   /* DRM format modifier. Set to DRM_FORMAT_MOD_INVALID to have addrlib
358    * select tiling parameters instead.
359    */
360   uint64_t modifier;
361   uint64_t flags;
362
363   uint64_t surf_size;
364   uint64_t fmask_size;
365   uint32_t fmask_slice_size; /* max 2^31 (16K * 16K * 8) */
366
367   /* DCC and HTILE (they are very small) */
368   uint32_t meta_size;
369   uint32_t meta_slice_size;
370   uint32_t meta_pitch;
371
372   uint32_t cmask_size;
373   uint32_t cmask_slice_size;
374   uint16_t cmask_pitch; /* GFX9+ */
375   uint16_t cmask_height; /* GFX9+ */
376
377   /* All buffers combined. */
378   uint64_t meta_offset; /* DCC or HTILE */
379   uint64_t fmask_offset;
380   uint64_t cmask_offset;
381   uint64_t display_dcc_offset;
382   uint64_t total_size;
383
384   union {
385      /* Return values for GFX8 and older.
386       *
387       * Some of them can be set by the caller if certain parameters are
388       * desirable. The allocator will try to obey them.
389       */
390      struct legacy_surf_layout legacy;
391
392      /* GFX9+ return values. */
393      struct gfx9_surf_layout gfx9;
394   } u;
395};
396
397struct ac_surf_info {
398   uint32_t width;
399   uint32_t height;
400   uint32_t depth;
401   uint8_t samples;         /* For Z/S: samples; For color: FMASK coverage samples */
402   uint8_t storage_samples; /* For color: allocated samples */
403   uint8_t levels;
404   uint8_t num_channels; /* heuristic for displayability */
405   uint16_t array_size;
406   uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
407   uint32_t *fmask_surf_index;
408};
409
410struct ac_surf_config {
411   struct ac_surf_info info;
412   unsigned is_1d : 1;
413   unsigned is_3d : 1;
414   unsigned is_cube : 1;
415};
416
417struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, uint64_t *max_alignment);
418void ac_addrlib_destroy(struct ac_addrlib *addrlib);
419void *ac_addrlib_get_handle(struct ac_addrlib *addrlib);
420
421int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
422                       const struct ac_surf_config *config, enum radeon_surf_mode mode,
423                       struct radeon_surf *surf);
424void ac_surface_zero_dcc_fields(struct radeon_surf *surf);
425
426void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
427                                uint64_t tiling_flags, enum radeon_surf_mode *mode);
428void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
429                                uint64_t *tiling_flags);
430
431bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
432                                 unsigned num_storage_samples, unsigned num_mipmap_levels,
433                                 unsigned size_metadata, const uint32_t metadata[64]);
434void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
435                                 unsigned num_mipmap_levels, uint32_t desc[8],
436                                 unsigned *size_metadata, uint32_t metadata[64]);
437
438bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
439                                       unsigned num_mipmap_levels, uint64_t offset, unsigned pitch);
440
441struct ac_modifier_options {
442	bool dcc; /* Whether to allow DCC. */
443	bool dcc_retile; /* Whether to allow use of a DCC retile map. */
444};
445
446bool ac_is_modifier_supported(const struct radeon_info *info,
447                              const struct ac_modifier_options *options,
448                              enum pipe_format format,
449                              uint64_t modifier);
450bool ac_get_supported_modifiers(const struct radeon_info *info,
451                                const struct ac_modifier_options *options,
452                                enum pipe_format format,
453                                unsigned *mod_count,
454                                uint64_t *mods);
455bool ac_modifier_has_dcc(uint64_t modifier);
456bool ac_modifier_has_dcc_retile(uint64_t modifier);
457bool ac_modifier_supports_dcc_image_stores(uint64_t modifier);
458void ac_modifier_max_extent(const struct radeon_info *info,
459                            uint64_t modifier, uint32_t *width, uint32_t *height);
460
461unsigned ac_surface_get_nplanes(const struct radeon_surf *surf);
462uint64_t ac_surface_get_plane_offset(enum chip_class chip_class,
463                                     const struct radeon_surf *surf,
464                                     unsigned plane, unsigned layer);
465uint64_t ac_surface_get_plane_stride(enum chip_class chip_class,
466                                     const struct radeon_surf *surf,
467                                     unsigned plane);
468/* Of the whole miplevel, not an individual layer */
469uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
470                                   unsigned plane);
471
472void ac_surface_print_info(FILE *out, const struct radeon_info *info,
473                           const struct radeon_surf *surf);
474
475bool ac_surface_supports_dcc_image_stores(enum chip_class chip_class,
476                                          const struct radeon_surf *surf);
477
478#ifdef AC_SURFACE_INCLUDE_NIR
479nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
480                                        unsigned bpe, struct gfx9_meta_equation *equation,
481                                        nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
482                                        nir_ssa_def *dcc_slice_size,
483                                        nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
484                                        nir_ssa_def *sample, nir_ssa_def *pipe_xor);
485
486nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
487                                        struct gfx9_meta_equation *equation,
488                                        nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
489                                        nir_ssa_def *cmask_slice_size,
490                                        nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
491                                        nir_ssa_def *pipe_xor,
492                                        nir_ssa_def **bit_position);
493
494nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
495                                          struct gfx9_meta_equation *equation,
496                                          nir_ssa_def *htile_pitch,
497                                          nir_ssa_def *htile_slice_size,
498                                          nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
499                                          nir_ssa_def *pipe_xor);
500#endif
501
502#ifdef __cplusplus
503}
504#endif
505
506#endif /* AC_SURFACE_H */
507