101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2017 Advanced Micro Devices, Inc. 301e04c3fSmrg * All Rights Reserved. 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub 901e04c3fSmrg * license, and/or sell copies of the Software, and to permit persons to whom 1001e04c3fSmrg * the Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 2001e04c3fSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2101e04c3fSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2201e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 257ec681f3Smrg#include "compiler/nir/nir.h" 2601e04c3fSmrg#include "radeon/radeon_uvd_enc.h" 277ec681f3Smrg#include "radeon/radeon_vce.h" 287ec681f3Smrg#include "radeon/radeon_video.h" 297ec681f3Smrg#include "si_pipe.h" 307ec681f3Smrg#include "util/u_cpu_detect.h" 3101e04c3fSmrg#include "util/u_screen.h" 3201e04c3fSmrg#include "util/u_video.h" 337ec681f3Smrg#include "vl/vl_decoder.h" 347ec681f3Smrg#include "vl/vl_video_buffer.h" 3501e04c3fSmrg#include <sys/utsname.h> 3601e04c3fSmrg 3701e04c3fSmrgstatic const char *si_get_vendor(struct pipe_screen *pscreen) 3801e04c3fSmrg{ 397ec681f3Smrg return "AMD"; 4001e04c3fSmrg} 4101e04c3fSmrg 4201e04c3fSmrgstatic const char *si_get_device_vendor(struct pipe_screen *pscreen) 4301e04c3fSmrg{ 447ec681f3Smrg return "AMD"; 4501e04c3fSmrg} 4601e04c3fSmrg 4701e04c3fSmrgstatic int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) 4801e04c3fSmrg{ 497ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)pscreen; 507ec681f3Smrg 517ec681f3Smrg switch (param) { 527ec681f3Smrg /* Supported features (boolean caps). */ 537ec681f3Smrg case PIPE_CAP_ACCELERATED: 547ec681f3Smrg case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: 557ec681f3Smrg case PIPE_CAP_ANISOTROPIC_FILTER: 567ec681f3Smrg case PIPE_CAP_POINT_SPRITE: 577ec681f3Smrg case PIPE_CAP_OCCLUSION_QUERY: 587ec681f3Smrg case PIPE_CAP_TEXTURE_MIRROR_CLAMP: 597ec681f3Smrg case PIPE_CAP_TEXTURE_SHADOW_LOD: 607ec681f3Smrg case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: 617ec681f3Smrg case PIPE_CAP_BLEND_EQUATION_SEPARATE: 627ec681f3Smrg case PIPE_CAP_TEXTURE_SWIZZLE: 637ec681f3Smrg case PIPE_CAP_DEPTH_CLIP_DISABLE: 647ec681f3Smrg case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: 657ec681f3Smrg case PIPE_CAP_SHADER_STENCIL_EXPORT: 667ec681f3Smrg case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: 677ec681f3Smrg case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: 687ec681f3Smrg case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: 697ec681f3Smrg case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: 707ec681f3Smrg case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: 717ec681f3Smrg case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: 727ec681f3Smrg case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: 737ec681f3Smrg case PIPE_CAP_VERTEX_SHADER_SATURATE: 747ec681f3Smrg case PIPE_CAP_PRIMITIVE_RESTART: 757ec681f3Smrg case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: 767ec681f3Smrg case PIPE_CAP_CONDITIONAL_RENDER: 777ec681f3Smrg case PIPE_CAP_TEXTURE_BARRIER: 787ec681f3Smrg case PIPE_CAP_INDEP_BLEND_ENABLE: 797ec681f3Smrg case PIPE_CAP_INDEP_BLEND_FUNC: 807ec681f3Smrg case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: 817ec681f3Smrg case PIPE_CAP_START_INSTANCE: 827ec681f3Smrg case PIPE_CAP_NPOT_TEXTURES: 837ec681f3Smrg case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: 847ec681f3Smrg case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: 857ec681f3Smrg case PIPE_CAP_VERTEX_COLOR_CLAMPED: 867ec681f3Smrg case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: 877ec681f3Smrg case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: 887ec681f3Smrg case PIPE_CAP_TGSI_INSTANCEID: 897ec681f3Smrg case PIPE_CAP_COMPUTE: 907ec681f3Smrg case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: 917ec681f3Smrg case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: 927ec681f3Smrg case PIPE_CAP_QUERY_PIPELINE_STATISTICS: 937ec681f3Smrg case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: 947ec681f3Smrg case PIPE_CAP_SAMPLE_SHADING: 957ec681f3Smrg case PIPE_CAP_DRAW_INDIRECT: 967ec681f3Smrg case PIPE_CAP_CLIP_HALFZ: 977ec681f3Smrg case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: 987ec681f3Smrg case PIPE_CAP_POLYGON_OFFSET_CLAMP: 997ec681f3Smrg case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: 1007ec681f3Smrg case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: 1017ec681f3Smrg case PIPE_CAP_TGSI_TEXCOORD: 1027ec681f3Smrg case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: 1037ec681f3Smrg case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: 1047ec681f3Smrg case PIPE_CAP_TEXTURE_FLOAT_LINEAR: 1057ec681f3Smrg case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: 1067ec681f3Smrg case PIPE_CAP_DEPTH_BOUNDS_TEST: 1077ec681f3Smrg case PIPE_CAP_SAMPLER_VIEW_TARGET: 1087ec681f3Smrg case PIPE_CAP_TEXTURE_QUERY_LOD: 1097ec681f3Smrg case PIPE_CAP_TEXTURE_GATHER_SM5: 1107ec681f3Smrg case PIPE_CAP_TGSI_TXQS: 1117ec681f3Smrg case PIPE_CAP_FORCE_PERSAMPLE_INTERP: 1127ec681f3Smrg case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: 1137ec681f3Smrg case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: 1147ec681f3Smrg case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: 1157ec681f3Smrg case PIPE_CAP_INVALIDATE_BUFFER: 1167ec681f3Smrg case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: 1177ec681f3Smrg case PIPE_CAP_QUERY_BUFFER_OBJECT: 1187ec681f3Smrg case PIPE_CAP_QUERY_MEMORY_INFO: 1197ec681f3Smrg case PIPE_CAP_TGSI_PACK_HALF_FLOAT: 1207ec681f3Smrg case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: 1217ec681f3Smrg case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: 1227ec681f3Smrg case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: 1237ec681f3Smrg case PIPE_CAP_STRING_MARKER: 1247ec681f3Smrg case PIPE_CAP_CLEAR_TEXTURE: 1257ec681f3Smrg case PIPE_CAP_CULL_DISTANCE: 1267ec681f3Smrg case PIPE_CAP_TGSI_ARRAY_COMPONENTS: 1277ec681f3Smrg case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: 1287ec681f3Smrg case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: 1297ec681f3Smrg case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: 1307ec681f3Smrg case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: 1317ec681f3Smrg case PIPE_CAP_DOUBLES: 1327ec681f3Smrg case PIPE_CAP_TGSI_TEX_TXF_LZ: 1337ec681f3Smrg case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: 1347ec681f3Smrg case PIPE_CAP_BINDLESS_TEXTURE: 1357ec681f3Smrg case PIPE_CAP_QUERY_TIMESTAMP: 1367ec681f3Smrg case PIPE_CAP_QUERY_TIME_ELAPSED: 1377ec681f3Smrg case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: 1387ec681f3Smrg case PIPE_CAP_MEMOBJ: 1397ec681f3Smrg case PIPE_CAP_LOAD_CONSTBUF: 1407ec681f3Smrg case PIPE_CAP_INT64: 1417ec681f3Smrg case PIPE_CAP_INT64_DIVMOD: 1427ec681f3Smrg case PIPE_CAP_TGSI_CLOCK: 1437ec681f3Smrg case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: 1447ec681f3Smrg case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: 1457ec681f3Smrg case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: 1467ec681f3Smrg case PIPE_CAP_TGSI_BALLOT: 1477ec681f3Smrg case PIPE_CAP_TGSI_VOTE: 1487ec681f3Smrg case PIPE_CAP_FBFETCH: 1497ec681f3Smrg case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK: 1507ec681f3Smrg case PIPE_CAP_IMAGE_LOAD_FORMATTED: 1517ec681f3Smrg case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA: 1527ec681f3Smrg case PIPE_CAP_TGSI_DIV: 1537ec681f3Smrg case PIPE_CAP_PACKED_UNIFORMS: 1547ec681f3Smrg case PIPE_CAP_SHADER_SAMPLES_IDENTICAL: 1557ec681f3Smrg case PIPE_CAP_GL_SPIRV: 1567ec681f3Smrg case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL: 1577ec681f3Smrg case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: 1587ec681f3Smrg case PIPE_CAP_NO_CLIP_ON_COPY_TEX: 1597ec681f3Smrg case PIPE_CAP_SHADER_ATOMIC_INT64: 1607ec681f3Smrg case PIPE_CAP_FRONTEND_NOOP: 1617ec681f3Smrg case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION: 1627ec681f3Smrg case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0: 1637ec681f3Smrg case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES: 1647ec681f3Smrg case PIPE_CAP_TGSI_ATOMINC_WRAP: 1657ec681f3Smrg return 1; 1667ec681f3Smrg 1677ec681f3Smrg case PIPE_CAP_DRAW_VERTEX_STATE: 1687ec681f3Smrg return !(sscreen->debug_flags & DBG(NO_FAST_DISPLAY_LIST)); 1697ec681f3Smrg 1707ec681f3Smrg case PIPE_CAP_GLSL_ZERO_INIT: 1717ec681f3Smrg return 2; 1727ec681f3Smrg 1737ec681f3Smrg case PIPE_CAP_GENERATE_MIPMAP: 1747ec681f3Smrg case PIPE_CAP_SEAMLESS_CUBE_MAP: 1757ec681f3Smrg case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: 1767ec681f3Smrg case PIPE_CAP_CUBE_MAP_ARRAY: 1777ec681f3Smrg return sscreen->info.has_3d_cube_border_color_mipmap; 1787ec681f3Smrg 1797ec681f3Smrg case PIPE_CAP_QUERY_SO_OVERFLOW: 1807ec681f3Smrg return !sscreen->use_ngg_streamout; 1817ec681f3Smrg 1827ec681f3Smrg case PIPE_CAP_POST_DEPTH_COVERAGE: 1837ec681f3Smrg return sscreen->info.chip_class >= GFX10; 1847ec681f3Smrg 1857ec681f3Smrg case PIPE_CAP_GRAPHICS: 1867ec681f3Smrg return sscreen->info.has_graphics; 1877ec681f3Smrg 1887ec681f3Smrg case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: 1897ec681f3Smrg return !SI_BIG_ENDIAN && sscreen->info.has_userptr; 1907ec681f3Smrg 1917ec681f3Smrg case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: 1927ec681f3Smrg return sscreen->info.has_gpu_reset_status_query; 1937ec681f3Smrg 1947ec681f3Smrg case PIPE_CAP_DEVICE_PROTECTED_CONTENT: 1957ec681f3Smrg return sscreen->info.has_tmz_support; 1967ec681f3Smrg 1977ec681f3Smrg case PIPE_CAP_TEXTURE_MULTISAMPLE: 1987ec681f3Smrg return sscreen->info.has_2d_tiling; 1997ec681f3Smrg 2007ec681f3Smrg case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: 2017ec681f3Smrg return SI_MAP_BUFFER_ALIGNMENT; 2027ec681f3Smrg 2037ec681f3Smrg case PIPE_CAP_MAX_VERTEX_BUFFERS: 2047ec681f3Smrg return SI_MAX_ATTRIBS; 2057ec681f3Smrg 2067ec681f3Smrg case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: 2077ec681f3Smrg case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: 2087ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: 2097ec681f3Smrg case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: 2107ec681f3Smrg case PIPE_CAP_MAX_VERTEX_STREAMS: 2117ec681f3Smrg case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: 2127ec681f3Smrg case PIPE_CAP_MAX_WINDOW_RECTANGLES: 2137ec681f3Smrg return 4; 2147ec681f3Smrg 2157ec681f3Smrg case PIPE_CAP_GLSL_FEATURE_LEVEL: 2167ec681f3Smrg case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: 2177ec681f3Smrg if (!sscreen->info.has_indirect_compute_dispatch) 2187ec681f3Smrg return 420; 2197ec681f3Smrg return 460; 2207ec681f3Smrg 2217ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: 2227ec681f3Smrg /* Optimal number for good TexSubImage performance on Polaris10. */ 2237ec681f3Smrg return 64 * 1024 * 1024; 2247ec681f3Smrg 2257ec681f3Smrg case PIPE_CAP_GL_BEGIN_END_BUFFER_SIZE: 2267ec681f3Smrg return 4096 * 1024; 2277ec681f3Smrg 2287ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: 2297ec681f3Smrg case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: 2307ec681f3Smrg /* Align it down to 256 bytes. I've chosen the number randomly. */ 2317ec681f3Smrg return ROUND_DOWN_TO(MIN2(sscreen->info.max_alloc_size, INT_MAX), 256); 2327ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_MB: 2337ec681f3Smrg return sscreen->info.max_alloc_size / (1024 * 1024); 2347ec681f3Smrg 2357ec681f3Smrg case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: 2367ec681f3Smrg case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: 2377ec681f3Smrg case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: 2387ec681f3Smrg case PIPE_CAP_PREFER_BACK_BUFFER_REUSE: 2397ec681f3Smrg return 0; 2407ec681f3Smrg 2417ec681f3Smrg case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: 2427ec681f3Smrg /* Gfx8 (Polaris11) hangs, so don't enable this on Gfx8 and older chips. */ 2437ec681f3Smrg return sscreen->info.chip_class >= GFX9 && 2447ec681f3Smrg sscreen->info.has_sparse_vm_mappings ? RADEON_SPARSE_PAGE_SIZE : 0; 2457ec681f3Smrg 2467ec681f3Smrg case PIPE_CAP_UMA: 2477ec681f3Smrg case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF: 2487ec681f3Smrg return 0; 2497ec681f3Smrg 2507ec681f3Smrg case PIPE_CAP_FENCE_SIGNAL: 2517ec681f3Smrg return sscreen->info.has_syncobj; 2527ec681f3Smrg 2537ec681f3Smrg case PIPE_CAP_CONSTBUF0_FLAGS: 2547ec681f3Smrg return SI_RESOURCE_FLAG_32BIT; 2557ec681f3Smrg 2567ec681f3Smrg case PIPE_CAP_NATIVE_FENCE_FD: 2577ec681f3Smrg return sscreen->info.has_fence_to_handle; 2587ec681f3Smrg 2597ec681f3Smrg case PIPE_CAP_DRAW_PARAMETERS: 2607ec681f3Smrg case PIPE_CAP_MULTI_DRAW_INDIRECT: 2617ec681f3Smrg case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: 2627ec681f3Smrg return sscreen->has_draw_indirect_multi; 2637ec681f3Smrg 2647ec681f3Smrg case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: 2657ec681f3Smrg return 30; 2667ec681f3Smrg 2677ec681f3Smrg case PIPE_CAP_MAX_VARYINGS: 2687ec681f3Smrg return 32; 2697ec681f3Smrg 2707ec681f3Smrg case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: 2717ec681f3Smrg return sscreen->info.chip_class <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0; 2727ec681f3Smrg 2737ec681f3Smrg /* Stream output. */ 2747ec681f3Smrg case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: 2757ec681f3Smrg case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: 2767ec681f3Smrg return 32 * 4; 2777ec681f3Smrg 2787ec681f3Smrg /* Geometry shader output. */ 2797ec681f3Smrg case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: 2807ec681f3Smrg /* gfx9 has to report 256 to make piglit/gs-max-output pass. 2817ec681f3Smrg * gfx8 and earlier can do 1024. 2827ec681f3Smrg */ 2837ec681f3Smrg return 256; 2847ec681f3Smrg case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: 2857ec681f3Smrg return 4095; 2867ec681f3Smrg case PIPE_CAP_MAX_GS_INVOCATIONS: 2877ec681f3Smrg /* Even though the hw supports more, we officially wanna expose only 32. */ 2887ec681f3Smrg return 32; 2897ec681f3Smrg 2907ec681f3Smrg case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: 2917ec681f3Smrg return 2048; 2927ec681f3Smrg 2937ec681f3Smrg /* Texturing. */ 2947ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_2D_SIZE: 2957ec681f3Smrg return 16384; 2967ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: 2977ec681f3Smrg if (!sscreen->info.has_3d_cube_border_color_mipmap) 2987ec681f3Smrg return 0; 2997ec681f3Smrg return 15; /* 16384 */ 3007ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: 3017ec681f3Smrg if (!sscreen->info.has_3d_cube_border_color_mipmap) 3027ec681f3Smrg return 0; 3037ec681f3Smrg if (sscreen->info.chip_class >= GFX10) 3047ec681f3Smrg return 14; 3057ec681f3Smrg /* textures support 8192, but layered rendering supports 2048 */ 3067ec681f3Smrg return 12; 3077ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: 3087ec681f3Smrg if (sscreen->info.chip_class >= GFX10) 3097ec681f3Smrg return 8192; 3107ec681f3Smrg /* textures support 8192, but layered rendering supports 2048 */ 3117ec681f3Smrg return 2048; 3127ec681f3Smrg 3137ec681f3Smrg /* Viewports and render targets. */ 3147ec681f3Smrg case PIPE_CAP_MAX_VIEWPORTS: 3157ec681f3Smrg return SI_MAX_VIEWPORTS; 3167ec681f3Smrg case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: 3177ec681f3Smrg case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS: 3187ec681f3Smrg case PIPE_CAP_MAX_RENDER_TARGETS: 3197ec681f3Smrg return 8; 3207ec681f3Smrg case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS: 3217ec681f3Smrg return sscreen->info.has_eqaa_surface_allocator ? 2 : 0; 3227ec681f3Smrg 3237ec681f3Smrg case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: 3247ec681f3Smrg case PIPE_CAP_MIN_TEXEL_OFFSET: 3257ec681f3Smrg return -32; 3267ec681f3Smrg 3277ec681f3Smrg case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: 3287ec681f3Smrg case PIPE_CAP_MAX_TEXEL_OFFSET: 3297ec681f3Smrg return 31; 3307ec681f3Smrg 3317ec681f3Smrg case PIPE_CAP_ENDIANNESS: 3327ec681f3Smrg return PIPE_ENDIAN_LITTLE; 3337ec681f3Smrg 3347ec681f3Smrg case PIPE_CAP_VENDOR_ID: 3357ec681f3Smrg return ATI_VENDOR_ID; 3367ec681f3Smrg case PIPE_CAP_DEVICE_ID: 3377ec681f3Smrg return sscreen->info.pci_id; 3387ec681f3Smrg case PIPE_CAP_VIDEO_MEMORY: 3397ec681f3Smrg return sscreen->info.vram_size >> 20; 3407ec681f3Smrg case PIPE_CAP_PCI_GROUP: 3417ec681f3Smrg return sscreen->info.pci_domain; 3427ec681f3Smrg case PIPE_CAP_PCI_BUS: 3437ec681f3Smrg return sscreen->info.pci_bus; 3447ec681f3Smrg case PIPE_CAP_PCI_DEVICE: 3457ec681f3Smrg return sscreen->info.pci_dev; 3467ec681f3Smrg case PIPE_CAP_PCI_FUNCTION: 3477ec681f3Smrg return sscreen->info.pci_func; 3487ec681f3Smrg 3497ec681f3Smrg default: 3507ec681f3Smrg return u_pipe_screen_get_param_defaults(pscreen, param); 3517ec681f3Smrg } 35201e04c3fSmrg} 35301e04c3fSmrg 3547ec681f3Smrgstatic float si_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) 35501e04c3fSmrg{ 3567ec681f3Smrg switch (param) { 3577ec681f3Smrg case PIPE_CAPF_MAX_LINE_WIDTH: 3587ec681f3Smrg case PIPE_CAPF_MAX_LINE_WIDTH_AA: 3597ec681f3Smrg /* This depends on the quant mode, though the precise interactions 3607ec681f3Smrg * are unknown. */ 3617ec681f3Smrg return 2048; 3627ec681f3Smrg case PIPE_CAPF_MAX_POINT_WIDTH: 3637ec681f3Smrg case PIPE_CAPF_MAX_POINT_WIDTH_AA: 3647ec681f3Smrg return SI_MAX_POINT_SIZE; 3657ec681f3Smrg case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: 3667ec681f3Smrg return 16.0f; 3677ec681f3Smrg case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: 3687ec681f3Smrg return 16.0f; 3697ec681f3Smrg case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: 3707ec681f3Smrg case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: 3717ec681f3Smrg case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: 3727ec681f3Smrg return 0.0f; 3737ec681f3Smrg } 3747ec681f3Smrg return 0.0f; 37501e04c3fSmrg} 37601e04c3fSmrg 3777ec681f3Smrgstatic int si_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, 3787ec681f3Smrg enum pipe_shader_cap param) 37901e04c3fSmrg{ 3807ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)pscreen; 3817ec681f3Smrg 3827ec681f3Smrg switch (param) { 3837ec681f3Smrg /* Shader limits. */ 3847ec681f3Smrg case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: 3857ec681f3Smrg case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: 3867ec681f3Smrg case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: 3877ec681f3Smrg case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: 3887ec681f3Smrg case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: 3897ec681f3Smrg return 16384; 3907ec681f3Smrg case PIPE_SHADER_CAP_MAX_INPUTS: 3917ec681f3Smrg return shader == PIPE_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32; 3927ec681f3Smrg case PIPE_SHADER_CAP_MAX_OUTPUTS: 3937ec681f3Smrg return shader == PIPE_SHADER_FRAGMENT ? 8 : 32; 3947ec681f3Smrg case PIPE_SHADER_CAP_MAX_TEMPS: 3957ec681f3Smrg return 256; /* Max native temporaries. */ 3967ec681f3Smrg case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: 3977ec681f3Smrg return 1 << 26; /* 64 MB */ 3987ec681f3Smrg case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: 3997ec681f3Smrg return SI_NUM_CONST_BUFFERS; 4007ec681f3Smrg case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: 4017ec681f3Smrg case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: 4027ec681f3Smrg return SI_NUM_SAMPLERS; 4037ec681f3Smrg case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: 4047ec681f3Smrg return SI_NUM_SHADER_BUFFERS; 4057ec681f3Smrg case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: 4067ec681f3Smrg return SI_NUM_IMAGES; 4077ec681f3Smrg case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: 4087ec681f3Smrg return 0; 4097ec681f3Smrg case PIPE_SHADER_CAP_PREFERRED_IR: 4107ec681f3Smrg return PIPE_SHADER_IR_NIR; 4117ec681f3Smrg case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: 4127ec681f3Smrg return 4; 4137ec681f3Smrg 4147ec681f3Smrg case PIPE_SHADER_CAP_SUPPORTED_IRS: 4157ec681f3Smrg if (shader == PIPE_SHADER_COMPUTE) { 4167ec681f3Smrg return (1 << PIPE_SHADER_IR_NATIVE) | 4177ec681f3Smrg (sscreen->info.has_indirect_compute_dispatch ? 4187ec681f3Smrg (1 << PIPE_SHADER_IR_NIR) | 4197ec681f3Smrg (1 << PIPE_SHADER_IR_TGSI) : 0); 4207ec681f3Smrg } 4217ec681f3Smrg return (1 << PIPE_SHADER_IR_TGSI) | 4227ec681f3Smrg (1 << PIPE_SHADER_IR_NIR); 4237ec681f3Smrg 4247ec681f3Smrg /* Supported boolean features. */ 4257ec681f3Smrg case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: 4267ec681f3Smrg case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: 4277ec681f3Smrg case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: 4287ec681f3Smrg case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: 4297ec681f3Smrg case PIPE_SHADER_CAP_INTEGERS: 4307ec681f3Smrg case PIPE_SHADER_CAP_INT64_ATOMICS: 4317ec681f3Smrg case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: 4327ec681f3Smrg case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: 4337ec681f3Smrg case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: 4347ec681f3Smrg case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: 4357ec681f3Smrg case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: 4367ec681f3Smrg case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: 4377ec681f3Smrg case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: /* lowered in finalize_nir */ 4387ec681f3Smrg case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: /* lowered in finalize_nir */ 4397ec681f3Smrg return 1; 4407ec681f3Smrg 4417ec681f3Smrg case PIPE_SHADER_CAP_FP16: 4427ec681f3Smrg case PIPE_SHADER_CAP_FP16_DERIVATIVES: 4437ec681f3Smrg case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: 4447ec681f3Smrg return sscreen->options.fp16; 4457ec681f3Smrg 4467ec681f3Smrg case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: 4477ec681f3Smrg /* We need f16c for fast FP16 conversions in glUniform. */ 4487ec681f3Smrg return sscreen->options.fp16 && util_get_cpu_caps()->has_f16c; 4497ec681f3Smrg 4507ec681f3Smrg /* Unsupported boolean features. */ 4517ec681f3Smrg case PIPE_SHADER_CAP_INT16: 4527ec681f3Smrg case PIPE_SHADER_CAP_SUBROUTINES: 4537ec681f3Smrg case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: 4547ec681f3Smrg case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: 4557ec681f3Smrg return 0; 4567ec681f3Smrg } 4577ec681f3Smrg return 0; 45801e04c3fSmrg} 45901e04c3fSmrg 4607ec681f3Smrgstatic const void *si_get_compiler_options(struct pipe_screen *screen, enum pipe_shader_ir ir, 4617ec681f3Smrg enum pipe_shader_type shader) 46201e04c3fSmrg{ 4637ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 4647ec681f3Smrg 4657ec681f3Smrg assert(ir == PIPE_SHADER_IR_NIR); 4667ec681f3Smrg return &sscreen->nir_options; 46701e04c3fSmrg} 46801e04c3fSmrg 46901e04c3fSmrgstatic void si_get_driver_uuid(struct pipe_screen *pscreen, char *uuid) 47001e04c3fSmrg{ 4717ec681f3Smrg ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE); 47201e04c3fSmrg} 47301e04c3fSmrg 47401e04c3fSmrgstatic void si_get_device_uuid(struct pipe_screen *pscreen, char *uuid) 47501e04c3fSmrg{ 4767ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)pscreen; 47701e04c3fSmrg 4787ec681f3Smrg ac_compute_device_uuid(&sscreen->info, uuid, PIPE_UUID_SIZE); 47901e04c3fSmrg} 48001e04c3fSmrg 4817ec681f3Smrgstatic const char *si_get_name(struct pipe_screen *pscreen) 48201e04c3fSmrg{ 4837ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)pscreen; 48401e04c3fSmrg 4857ec681f3Smrg return sscreen->renderer_string; 48601e04c3fSmrg} 48701e04c3fSmrg 4887ec681f3Smrgstatic int si_get_video_param_no_video_hw(struct pipe_screen *screen, enum pipe_video_profile profile, 4897ec681f3Smrg enum pipe_video_entrypoint entrypoint, 4907ec681f3Smrg enum pipe_video_cap param) 49101e04c3fSmrg{ 4927ec681f3Smrg switch (param) { 4937ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTED: 4947ec681f3Smrg return vl_profile_supported(screen, profile, entrypoint); 4957ec681f3Smrg case PIPE_VIDEO_CAP_NPOT_TEXTURES: 4967ec681f3Smrg return 1; 4977ec681f3Smrg case PIPE_VIDEO_CAP_MAX_WIDTH: 4987ec681f3Smrg case PIPE_VIDEO_CAP_MAX_HEIGHT: 4997ec681f3Smrg return vl_video_buffer_max_size(screen); 5007ec681f3Smrg case PIPE_VIDEO_CAP_PREFERED_FORMAT: 5017ec681f3Smrg return PIPE_FORMAT_NV12; 5027ec681f3Smrg case PIPE_VIDEO_CAP_PREFERS_INTERLACED: 5037ec681f3Smrg return false; 5047ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: 5057ec681f3Smrg return false; 5067ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: 5077ec681f3Smrg return true; 5087ec681f3Smrg case PIPE_VIDEO_CAP_MAX_LEVEL: 5097ec681f3Smrg return vl_level_supported(screen, profile); 5107ec681f3Smrg default: 5117ec681f3Smrg return 0; 5127ec681f3Smrg } 51301e04c3fSmrg} 51401e04c3fSmrg 5157ec681f3Smrgstatic int si_get_video_param(struct pipe_screen *screen, enum pipe_video_profile profile, 5167ec681f3Smrg enum pipe_video_entrypoint entrypoint, enum pipe_video_cap param) 51701e04c3fSmrg{ 5187ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 5197ec681f3Smrg enum pipe_video_format codec = u_reduce_video_profile(profile); 5207ec681f3Smrg 5217ec681f3Smrg if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) { 5227ec681f3Smrg if (!(sscreen->info.has_video_hw.vce_encode || 5237ec681f3Smrg sscreen->info.has_video_hw.uvd_encode || 5247ec681f3Smrg sscreen->info.has_video_hw.vcn_encode)) 5257ec681f3Smrg return 0; 5267ec681f3Smrg 5277ec681f3Smrg switch (param) { 5287ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTED: 5297ec681f3Smrg return ( 5307ec681f3Smrg (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && 5317ec681f3Smrg (sscreen->info.family >= CHIP_RAVEN || si_vce_is_fw_version_supported(sscreen))) || 5327ec681f3Smrg (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN && 5337ec681f3Smrg (sscreen->info.family >= CHIP_RAVEN || si_radeon_uvd_enc_supported(sscreen))) || 5347ec681f3Smrg (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10 && sscreen->info.family >= CHIP_RENOIR)); 5357ec681f3Smrg case PIPE_VIDEO_CAP_NPOT_TEXTURES: 5367ec681f3Smrg return 1; 5377ec681f3Smrg case PIPE_VIDEO_CAP_MAX_WIDTH: 5387ec681f3Smrg if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && 5397ec681f3Smrg sscreen->info.enc_caps.codec_info[codec - 1].valid) 5407ec681f3Smrg return sscreen->info.enc_caps.codec_info[codec - 1].max_width; 5417ec681f3Smrg else 5427ec681f3Smrg return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096; 5437ec681f3Smrg case PIPE_VIDEO_CAP_MAX_HEIGHT: 5447ec681f3Smrg if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && 5457ec681f3Smrg sscreen->info.enc_caps.codec_info[codec - 1].valid) 5467ec681f3Smrg return sscreen->info.enc_caps.codec_info[codec - 1].max_height; 5477ec681f3Smrg else 5487ec681f3Smrg return (sscreen->info.family < CHIP_TONGA) ? 1152 : 2304; 5497ec681f3Smrg case PIPE_VIDEO_CAP_PREFERED_FORMAT: 5507ec681f3Smrg if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) 5517ec681f3Smrg return PIPE_FORMAT_P010; 5527ec681f3Smrg else 5537ec681f3Smrg return PIPE_FORMAT_NV12; 5547ec681f3Smrg case PIPE_VIDEO_CAP_PREFERS_INTERLACED: 5557ec681f3Smrg return false; 5567ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: 5577ec681f3Smrg return false; 5587ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: 5597ec681f3Smrg return true; 5607ec681f3Smrg case PIPE_VIDEO_CAP_STACKED_FRAMES: 5617ec681f3Smrg return (sscreen->info.family < CHIP_TONGA) ? 1 : 2; 5627ec681f3Smrg case PIPE_VIDEO_CAP_MAX_TEMPORAL_LAYERS: 5637ec681f3Smrg if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC && 5647ec681f3Smrg sscreen->info.family >= CHIP_RAVEN) 5657ec681f3Smrg return 4; 5667ec681f3Smrg else 5677ec681f3Smrg return 0; 5687ec681f3Smrg default: 5697ec681f3Smrg return 0; 5707ec681f3Smrg } 5717ec681f3Smrg } 5727ec681f3Smrg 5737ec681f3Smrg switch (param) { 5747ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTED: 5757ec681f3Smrg if (codec < PIPE_VIDEO_FORMAT_MPEG4_AVC && 5767ec681f3Smrg sscreen->info.family >= CHIP_BEIGE_GOBY) 5777ec681f3Smrg return false; 5787ec681f3Smrg if (codec != PIPE_VIDEO_FORMAT_JPEG && 5797ec681f3Smrg !(sscreen->info.has_video_hw.uvd_decode || 5807ec681f3Smrg sscreen->info.has_video_hw.vcn_decode)) 5817ec681f3Smrg return false; 5827ec681f3Smrg 5837ec681f3Smrg switch (codec) { 5847ec681f3Smrg case PIPE_VIDEO_FORMAT_MPEG12: 5857ec681f3Smrg return profile != PIPE_VIDEO_PROFILE_MPEG1; 5867ec681f3Smrg case PIPE_VIDEO_FORMAT_MPEG4: 5877ec681f3Smrg return 1; 5887ec681f3Smrg case PIPE_VIDEO_FORMAT_MPEG4_AVC: 5897ec681f3Smrg if ((sscreen->info.family == CHIP_POLARIS10 || sscreen->info.family == CHIP_POLARIS11) && 5907ec681f3Smrg sscreen->info.uvd_fw_version < UVD_FW_1_66_16) { 5917ec681f3Smrg RVID_ERR("POLARIS10/11 firmware version need to be updated.\n"); 5927ec681f3Smrg return false; 5937ec681f3Smrg } 5947ec681f3Smrg return true; 5957ec681f3Smrg case PIPE_VIDEO_FORMAT_VC1: 5967ec681f3Smrg return true; 5977ec681f3Smrg case PIPE_VIDEO_FORMAT_HEVC: 5987ec681f3Smrg /* Carrizo only supports HEVC Main */ 5997ec681f3Smrg if (sscreen->info.family >= CHIP_STONEY) 6007ec681f3Smrg return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN || 6017ec681f3Smrg profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10); 6027ec681f3Smrg else if (sscreen->info.family >= CHIP_CARRIZO) 6037ec681f3Smrg return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN; 6047ec681f3Smrg return false; 6057ec681f3Smrg case PIPE_VIDEO_FORMAT_JPEG: 6067ec681f3Smrg if (sscreen->info.family >= CHIP_RAVEN) { 6077ec681f3Smrg if (!sscreen->info.has_video_hw.jpeg_decode) 6087ec681f3Smrg return false; 6097ec681f3Smrg else 6107ec681f3Smrg return true; 6117ec681f3Smrg } 6127ec681f3Smrg if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10) 6137ec681f3Smrg return false; 6147ec681f3Smrg if (!(sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 19)) { 6157ec681f3Smrg RVID_ERR("No MJPEG support for the kernel version\n"); 6167ec681f3Smrg return false; 6177ec681f3Smrg } 6187ec681f3Smrg return true; 6197ec681f3Smrg case PIPE_VIDEO_FORMAT_VP9: 6207ec681f3Smrg if (sscreen->info.family < CHIP_RAVEN) 6217ec681f3Smrg return false; 6227ec681f3Smrg return true; 6237ec681f3Smrg case PIPE_VIDEO_FORMAT_AV1: 6247ec681f3Smrg if (sscreen->info.family < CHIP_SIENNA_CICHLID) 6257ec681f3Smrg return false; 6267ec681f3Smrg return true; 6277ec681f3Smrg default: 6287ec681f3Smrg return false; 6297ec681f3Smrg } 6307ec681f3Smrg case PIPE_VIDEO_CAP_NPOT_TEXTURES: 6317ec681f3Smrg return 1; 6327ec681f3Smrg case PIPE_VIDEO_CAP_MAX_WIDTH: 6337ec681f3Smrg if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && 6347ec681f3Smrg sscreen->info.dec_caps.codec_info[codec - 1].valid) { 6357ec681f3Smrg return sscreen->info.dec_caps.codec_info[codec - 1].max_width; 6367ec681f3Smrg } else { 6377ec681f3Smrg switch (codec) { 6387ec681f3Smrg case PIPE_VIDEO_FORMAT_HEVC: 6397ec681f3Smrg case PIPE_VIDEO_FORMAT_VP9: 6407ec681f3Smrg case PIPE_VIDEO_FORMAT_AV1: 6417ec681f3Smrg return (sscreen->info.family < CHIP_RENOIR) ? 6427ec681f3Smrg ((sscreen->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192; 6437ec681f3Smrg default: 6447ec681f3Smrg return (sscreen->info.family < CHIP_TONGA) ? 2048 : 4096; 6457ec681f3Smrg } 6467ec681f3Smrg } 6477ec681f3Smrg case PIPE_VIDEO_CAP_MAX_HEIGHT: 6487ec681f3Smrg if (codec != PIPE_VIDEO_FORMAT_UNKNOWN && 6497ec681f3Smrg sscreen->info.dec_caps.codec_info[codec - 1].valid) { 6507ec681f3Smrg return sscreen->info.dec_caps.codec_info[codec - 1].max_height; 6517ec681f3Smrg } else { 6527ec681f3Smrg switch (codec) { 6537ec681f3Smrg case PIPE_VIDEO_FORMAT_HEVC: 6547ec681f3Smrg case PIPE_VIDEO_FORMAT_VP9: 6557ec681f3Smrg case PIPE_VIDEO_FORMAT_AV1: 6567ec681f3Smrg return (sscreen->info.family < CHIP_RENOIR) ? 6577ec681f3Smrg ((sscreen->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352; 6587ec681f3Smrg default: 6597ec681f3Smrg return (sscreen->info.family < CHIP_TONGA) ? 1152 : 4096; 6607ec681f3Smrg } 6617ec681f3Smrg } 6627ec681f3Smrg case PIPE_VIDEO_CAP_PREFERED_FORMAT: 6637ec681f3Smrg if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) 6647ec681f3Smrg return PIPE_FORMAT_P010; 6657ec681f3Smrg else if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) 6667ec681f3Smrg return PIPE_FORMAT_P010; 6677ec681f3Smrg else 6687ec681f3Smrg return PIPE_FORMAT_NV12; 6697ec681f3Smrg 6707ec681f3Smrg case PIPE_VIDEO_CAP_PREFERS_INTERLACED: 6717ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: { 6727ec681f3Smrg enum pipe_video_format format = u_reduce_video_profile(profile); 6737ec681f3Smrg 6747ec681f3Smrg if (format >= PIPE_VIDEO_FORMAT_HEVC) 6757ec681f3Smrg return false; 6767ec681f3Smrg return true; 6777ec681f3Smrg } 6787ec681f3Smrg case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: 6797ec681f3Smrg return true; 6807ec681f3Smrg case PIPE_VIDEO_CAP_MAX_LEVEL: 6817ec681f3Smrg if ((profile == PIPE_VIDEO_PROFILE_MPEG2_SIMPLE || 6827ec681f3Smrg profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN || 6837ec681f3Smrg profile == PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE || 6847ec681f3Smrg profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED) && 6857ec681f3Smrg sscreen->info.dec_caps.codec_info[codec - 1].valid) { 6867ec681f3Smrg return sscreen->info.dec_caps.codec_info[codec - 1].max_level; 6877ec681f3Smrg } else { 6887ec681f3Smrg switch (profile) { 6897ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG1: 6907ec681f3Smrg return 0; 6917ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE: 6927ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG2_MAIN: 6937ec681f3Smrg return 3; 6947ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE: 6957ec681f3Smrg return 3; 6967ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE: 6977ec681f3Smrg return 5; 6987ec681f3Smrg case PIPE_VIDEO_PROFILE_VC1_SIMPLE: 6997ec681f3Smrg return 1; 7007ec681f3Smrg case PIPE_VIDEO_PROFILE_VC1_MAIN: 7017ec681f3Smrg return 2; 7027ec681f3Smrg case PIPE_VIDEO_PROFILE_VC1_ADVANCED: 7037ec681f3Smrg return 4; 7047ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: 7057ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: 7067ec681f3Smrg case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: 7077ec681f3Smrg return (sscreen->info.family < CHIP_TONGA) ? 41 : 52; 7087ec681f3Smrg case PIPE_VIDEO_PROFILE_HEVC_MAIN: 7097ec681f3Smrg case PIPE_VIDEO_PROFILE_HEVC_MAIN_10: 7107ec681f3Smrg return 186; 7117ec681f3Smrg default: 7127ec681f3Smrg return 0; 7137ec681f3Smrg } 7147ec681f3Smrg } 7157ec681f3Smrg default: 7167ec681f3Smrg return 0; 7177ec681f3Smrg } 71801e04c3fSmrg} 71901e04c3fSmrg 7207ec681f3Smrgstatic bool si_vid_is_format_supported(struct pipe_screen *screen, enum pipe_format format, 7217ec681f3Smrg enum pipe_video_profile profile, 7227ec681f3Smrg enum pipe_video_entrypoint entrypoint) 72301e04c3fSmrg{ 7247ec681f3Smrg /* HEVC 10 bit decoding should use P010 instead of NV12 if possible */ 7257ec681f3Smrg if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) 7267ec681f3Smrg return (format == PIPE_FORMAT_NV12) || (format == PIPE_FORMAT_P010) || 7277ec681f3Smrg (format == PIPE_FORMAT_P016); 72801e04c3fSmrg 7297ec681f3Smrg /* Vp9 profile 2 supports 10 bit decoding using P016 */ 7307ec681f3Smrg if (profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) 7317ec681f3Smrg return (format == PIPE_FORMAT_P010) || (format == PIPE_FORMAT_P016); 73201e04c3fSmrg 7337ec681f3Smrg /* we can only handle this one with UVD */ 7347ec681f3Smrg if (profile != PIPE_VIDEO_PROFILE_UNKNOWN) 7357ec681f3Smrg return format == PIPE_FORMAT_NV12; 7367ec681f3Smrg 7377ec681f3Smrg return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint); 73801e04c3fSmrg} 73901e04c3fSmrg 7407ec681f3Smrgstatic unsigned get_max_threads_per_block(struct si_screen *screen, enum pipe_shader_ir ir_type) 74101e04c3fSmrg{ 7427ec681f3Smrg if (ir_type == PIPE_SHADER_IR_NATIVE) 7437ec681f3Smrg return 256; 74401e04c3fSmrg 7457ec681f3Smrg /* LLVM only supports 1024 threads per block. */ 7467ec681f3Smrg return 1024; 74701e04c3fSmrg} 74801e04c3fSmrg 7497ec681f3Smrgstatic int si_get_compute_param(struct pipe_screen *screen, enum pipe_shader_ir ir_type, 7507ec681f3Smrg enum pipe_compute_cap param, void *ret) 75101e04c3fSmrg{ 7527ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 7537ec681f3Smrg 7547ec681f3Smrg // TODO: select these params by asic 7557ec681f3Smrg switch (param) { 7567ec681f3Smrg case PIPE_COMPUTE_CAP_IR_TARGET: { 7577ec681f3Smrg const char *gpu, *triple; 7587ec681f3Smrg 7597ec681f3Smrg triple = "amdgcn-mesa-mesa3d"; 7607ec681f3Smrg gpu = ac_get_llvm_processor_name(sscreen->info.family); 7617ec681f3Smrg if (ret) { 7627ec681f3Smrg sprintf(ret, "%s-%s", gpu, triple); 7637ec681f3Smrg } 7647ec681f3Smrg /* +2 for dash and terminating NIL byte */ 7657ec681f3Smrg return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); 7667ec681f3Smrg } 7677ec681f3Smrg case PIPE_COMPUTE_CAP_GRID_DIMENSION: 7687ec681f3Smrg if (ret) { 7697ec681f3Smrg uint64_t *grid_dimension = ret; 7707ec681f3Smrg grid_dimension[0] = 3; 7717ec681f3Smrg } 7727ec681f3Smrg return 1 * sizeof(uint64_t); 7737ec681f3Smrg 7747ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: 7757ec681f3Smrg if (ret) { 7767ec681f3Smrg uint64_t *grid_size = ret; 7777ec681f3Smrg grid_size[0] = 65535; 7787ec681f3Smrg grid_size[1] = 65535; 7797ec681f3Smrg grid_size[2] = 65535; 7807ec681f3Smrg } 7817ec681f3Smrg return 3 * sizeof(uint64_t); 7827ec681f3Smrg 7837ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: 7847ec681f3Smrg if (ret) { 7857ec681f3Smrg uint64_t *block_size = ret; 7867ec681f3Smrg unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type); 7877ec681f3Smrg block_size[0] = threads_per_block; 7887ec681f3Smrg block_size[1] = threads_per_block; 7897ec681f3Smrg block_size[2] = threads_per_block; 7907ec681f3Smrg } 7917ec681f3Smrg return 3 * sizeof(uint64_t); 7927ec681f3Smrg 7937ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: 7947ec681f3Smrg if (ret) { 7957ec681f3Smrg uint64_t *max_threads_per_block = ret; 7967ec681f3Smrg *max_threads_per_block = get_max_threads_per_block(sscreen, ir_type); 7977ec681f3Smrg } 7987ec681f3Smrg return sizeof(uint64_t); 7997ec681f3Smrg case PIPE_COMPUTE_CAP_ADDRESS_BITS: 8007ec681f3Smrg if (ret) { 8017ec681f3Smrg uint32_t *address_bits = ret; 8027ec681f3Smrg address_bits[0] = 64; 8037ec681f3Smrg } 8047ec681f3Smrg return 1 * sizeof(uint32_t); 8057ec681f3Smrg 8067ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: 8077ec681f3Smrg if (ret) { 8087ec681f3Smrg uint64_t *max_global_size = ret; 8097ec681f3Smrg uint64_t max_mem_alloc_size; 8107ec681f3Smrg 8117ec681f3Smrg si_get_compute_param(screen, ir_type, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, 8127ec681f3Smrg &max_mem_alloc_size); 8137ec681f3Smrg 8147ec681f3Smrg /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least 8157ec681f3Smrg * 1/4 of the MAX_GLOBAL_SIZE. Since the 8167ec681f3Smrg * MAX_MEM_ALLOC_SIZE is fixed for older kernels, 8177ec681f3Smrg * make sure we never report more than 8187ec681f3Smrg * 4 * MAX_MEM_ALLOC_SIZE. 8197ec681f3Smrg */ 8207ec681f3Smrg *max_global_size = 8217ec681f3Smrg MIN2(4 * max_mem_alloc_size, MAX2(sscreen->info.gart_size, sscreen->info.vram_size)); 8227ec681f3Smrg } 8237ec681f3Smrg return sizeof(uint64_t); 8247ec681f3Smrg 8257ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: 8267ec681f3Smrg if (ret) { 8277ec681f3Smrg uint64_t *max_local_size = ret; 8287ec681f3Smrg /* Value reported by the closed source driver. */ 8297ec681f3Smrg *max_local_size = 32768; 8307ec681f3Smrg } 8317ec681f3Smrg return sizeof(uint64_t); 8327ec681f3Smrg 8337ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: 8347ec681f3Smrg if (ret) { 8357ec681f3Smrg uint64_t *max_input_size = ret; 8367ec681f3Smrg /* Value reported by the closed source driver. */ 8377ec681f3Smrg *max_input_size = 1024; 8387ec681f3Smrg } 8397ec681f3Smrg return sizeof(uint64_t); 8407ec681f3Smrg 8417ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: 8427ec681f3Smrg if (ret) { 8437ec681f3Smrg uint64_t *max_mem_alloc_size = ret; 8447ec681f3Smrg 8457ec681f3Smrg *max_mem_alloc_size = sscreen->info.max_alloc_size; 8467ec681f3Smrg } 8477ec681f3Smrg return sizeof(uint64_t); 8487ec681f3Smrg 8497ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: 8507ec681f3Smrg if (ret) { 8517ec681f3Smrg uint32_t *max_clock_frequency = ret; 8527ec681f3Smrg *max_clock_frequency = sscreen->info.max_shader_clock; 8537ec681f3Smrg } 8547ec681f3Smrg return sizeof(uint32_t); 8557ec681f3Smrg 8567ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: 8577ec681f3Smrg if (ret) { 8587ec681f3Smrg uint32_t *max_compute_units = ret; 8597ec681f3Smrg *max_compute_units = sscreen->info.num_good_compute_units; 8607ec681f3Smrg } 8617ec681f3Smrg return sizeof(uint32_t); 8627ec681f3Smrg 8637ec681f3Smrg case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: 8647ec681f3Smrg if (ret) { 8657ec681f3Smrg uint32_t *images_supported = ret; 8667ec681f3Smrg *images_supported = 0; 8677ec681f3Smrg } 8687ec681f3Smrg return sizeof(uint32_t); 8697ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: 8707ec681f3Smrg break; /* unused */ 8717ec681f3Smrg case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: 8727ec681f3Smrg if (ret) { 8737ec681f3Smrg uint32_t *subgroup_size = ret; 8747ec681f3Smrg *subgroup_size = sscreen->compute_wave_size; 8757ec681f3Smrg } 8767ec681f3Smrg return sizeof(uint32_t); 8777ec681f3Smrg case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: 8787ec681f3Smrg if (ret) { 8797ec681f3Smrg uint64_t *max_variable_threads_per_block = ret; 8807ec681f3Smrg if (ir_type == PIPE_SHADER_IR_NATIVE) 8817ec681f3Smrg *max_variable_threads_per_block = 0; 8827ec681f3Smrg else 8837ec681f3Smrg *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; 8847ec681f3Smrg } 8857ec681f3Smrg return sizeof(uint64_t); 8867ec681f3Smrg } 8877ec681f3Smrg 8887ec681f3Smrg fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); 8897ec681f3Smrg return 0; 89001e04c3fSmrg} 89101e04c3fSmrg 89201e04c3fSmrgstatic uint64_t si_get_timestamp(struct pipe_screen *screen) 89301e04c3fSmrg{ 8947ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 89501e04c3fSmrg 8967ec681f3Smrg return 1000000 * sscreen->ws->query_value(sscreen->ws, RADEON_TIMESTAMP) / 8977ec681f3Smrg sscreen->info.clock_crystal_freq; 89801e04c3fSmrg} 89901e04c3fSmrg 9007ec681f3Smrgstatic void si_query_memory_info(struct pipe_screen *screen, struct pipe_memory_info *info) 90101e04c3fSmrg{ 9027ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)screen; 9037ec681f3Smrg struct radeon_winsys *ws = sscreen->ws; 9047ec681f3Smrg unsigned vram_usage, gtt_usage; 9057ec681f3Smrg 9067ec681f3Smrg info->total_device_memory = sscreen->info.vram_size_kb; 9077ec681f3Smrg info->total_staging_memory = sscreen->info.gart_size_kb; 9087ec681f3Smrg 9097ec681f3Smrg /* The real TTM memory usage is somewhat random, because: 9107ec681f3Smrg * 9117ec681f3Smrg * 1) TTM delays freeing memory, because it can only free it after 9127ec681f3Smrg * fences expire. 9137ec681f3Smrg * 9147ec681f3Smrg * 2) The memory usage can be really low if big VRAM evictions are 9157ec681f3Smrg * taking place, but the real usage is well above the size of VRAM. 9167ec681f3Smrg * 9177ec681f3Smrg * Instead, return statistics of this process. 9187ec681f3Smrg */ 9197ec681f3Smrg vram_usage = ws->query_value(ws, RADEON_VRAM_USAGE) / 1024; 9207ec681f3Smrg gtt_usage = ws->query_value(ws, RADEON_GTT_USAGE) / 1024; 9217ec681f3Smrg 9227ec681f3Smrg info->avail_device_memory = 9237ec681f3Smrg vram_usage <= info->total_device_memory ? info->total_device_memory - vram_usage : 0; 9247ec681f3Smrg info->avail_staging_memory = 9257ec681f3Smrg gtt_usage <= info->total_staging_memory ? info->total_staging_memory - gtt_usage : 0; 9267ec681f3Smrg 9277ec681f3Smrg info->device_memory_evicted = ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; 9287ec681f3Smrg 9297ec681f3Smrg if (sscreen->info.is_amdgpu && sscreen->info.drm_minor >= 4) 9307ec681f3Smrg info->nr_device_memory_evictions = ws->query_value(ws, RADEON_NUM_EVICTIONS); 9317ec681f3Smrg else 9327ec681f3Smrg /* Just return the number of evicted 64KB pages. */ 9337ec681f3Smrg info->nr_device_memory_evictions = info->device_memory_evicted / 64; 93401e04c3fSmrg} 93501e04c3fSmrg 93601e04c3fSmrgstatic struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen) 93701e04c3fSmrg{ 9387ec681f3Smrg struct si_screen *sscreen = (struct si_screen *)pscreen; 93901e04c3fSmrg 9407ec681f3Smrg return sscreen->disk_shader_cache; 94101e04c3fSmrg} 94201e04c3fSmrg 94301e04c3fSmrgstatic void si_init_renderer_string(struct si_screen *sscreen) 94401e04c3fSmrg{ 9457ec681f3Smrg char first_name[256], second_name[32] = {}, kernel_version[128] = {}; 9467ec681f3Smrg struct utsname uname_data; 9477ec681f3Smrg 9487ec681f3Smrg if (sscreen->info.marketing_name) { 9497ec681f3Smrg snprintf(first_name, sizeof(first_name), "%s", sscreen->info.marketing_name); 9507ec681f3Smrg snprintf(second_name, sizeof(second_name), "%s, ", sscreen->info.name); 9517ec681f3Smrg } else { 9527ec681f3Smrg snprintf(first_name, sizeof(first_name), "AMD %s", sscreen->info.name); 9537ec681f3Smrg } 9547ec681f3Smrg 9557ec681f3Smrg if (uname(&uname_data) == 0) 9567ec681f3Smrg snprintf(kernel_version, sizeof(kernel_version), ", %s", uname_data.release); 9577ec681f3Smrg 9587ec681f3Smrg snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string), 9597ec681f3Smrg "%s (%sDRM %i.%i.%i%s, LLVM " MESA_LLVM_VERSION_STRING ")", first_name, second_name, 9607ec681f3Smrg sscreen->info.drm_major, sscreen->info.drm_minor, sscreen->info.drm_patchlevel, 9617ec681f3Smrg kernel_version); 96201e04c3fSmrg} 96301e04c3fSmrg 96401e04c3fSmrgvoid si_init_screen_get_functions(struct si_screen *sscreen) 96501e04c3fSmrg{ 9667ec681f3Smrg util_cpu_detect(); 9677ec681f3Smrg 9687ec681f3Smrg sscreen->b.get_name = si_get_name; 9697ec681f3Smrg sscreen->b.get_vendor = si_get_vendor; 9707ec681f3Smrg sscreen->b.get_device_vendor = si_get_device_vendor; 9717ec681f3Smrg sscreen->b.get_param = si_get_param; 9727ec681f3Smrg sscreen->b.get_paramf = si_get_paramf; 9737ec681f3Smrg sscreen->b.get_compute_param = si_get_compute_param; 9747ec681f3Smrg sscreen->b.get_timestamp = si_get_timestamp; 9757ec681f3Smrg sscreen->b.get_shader_param = si_get_shader_param; 9767ec681f3Smrg sscreen->b.get_compiler_options = si_get_compiler_options; 9777ec681f3Smrg sscreen->b.get_device_uuid = si_get_device_uuid; 9787ec681f3Smrg sscreen->b.get_driver_uuid = si_get_driver_uuid; 9797ec681f3Smrg sscreen->b.query_memory_info = si_query_memory_info; 9807ec681f3Smrg sscreen->b.get_disk_shader_cache = si_get_disk_shader_cache; 9817ec681f3Smrg 9827ec681f3Smrg if (sscreen->info.has_video_hw.uvd_decode || sscreen->info.has_video_hw.vcn_decode || 9837ec681f3Smrg sscreen->info.has_video_hw.jpeg_decode || sscreen->info.has_video_hw.vce_encode || 9847ec681f3Smrg sscreen->info.has_video_hw.uvd_encode || sscreen->info.has_video_hw.vcn_encode) { 9857ec681f3Smrg sscreen->b.get_video_param = si_get_video_param; 9867ec681f3Smrg sscreen->b.is_video_format_supported = si_vid_is_format_supported; 9877ec681f3Smrg } else { 9887ec681f3Smrg sscreen->b.get_video_param = si_get_video_param_no_video_hw; 9897ec681f3Smrg sscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; 9907ec681f3Smrg } 9917ec681f3Smrg 9927ec681f3Smrg si_init_renderer_string(sscreen); 9937ec681f3Smrg 9947ec681f3Smrg const struct nir_shader_compiler_options nir_options = { 9957ec681f3Smrg .lower_scmp = true, 9967ec681f3Smrg .lower_flrp16 = true, 9977ec681f3Smrg .lower_flrp32 = true, 9987ec681f3Smrg .lower_flrp64 = true, 9997ec681f3Smrg .lower_fsat = true, 10007ec681f3Smrg .lower_fdiv = true, 10017ec681f3Smrg .lower_bitfield_insert_to_bitfield_select = true, 10027ec681f3Smrg .lower_bitfield_extract = true, 10037ec681f3Smrg /* |---------------------------------- Performance & Availability --------------------------------| 10047ec681f3Smrg * |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice 10057ec681f3Smrg * Arch | F32,F16,F64 | F32,F16 | F32,F16 |F32,F16,F64 | F32,F16 | F32,F16 |PK_FMAC_F16|F16,F32,F64 10067ec681f3Smrg * ------------------------------------------------------------------------------------------------------------------ 10077ec681f3Smrg * gfx6,7 | 1 , - , - | 1 , - | 1 , - |1/4, - ,1/16| - , - | - , - | - , - | - ,MAD,FMA 10087ec681f3Smrg * gfx8 | 1 , 1 , - | 1 , - | - , - |1/4, 1 ,1/16| - , - | - , - | - , - |MAD,MAD,FMA 10097ec681f3Smrg * gfx9 | 1 ,1|0, - | 1 , - | - , - | 1 , 1 ,1/16| 0|1, - | - , 1 | 2 , - |FMA,MAD,FMA 10107ec681f3Smrg * gfx10 | 1 , - , - | 1 , - | 1 , - | 1 , 1 ,1/16| 1 , 1 | - , - | 2 , 2 |FMA,MAD,FMA 10117ec681f3Smrg * gfx10.3| - , - , - | - , - | - , - | 1 , 1 ,1/16| 1 , 1 | 1 , - | 2 , 2 | all FMA 10127ec681f3Smrg * 10137ec681f3Smrg * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4 10147ec681f3Smrg * gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir. 10157ec681f3Smrg * gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK. 10167ec681f3Smrg * 10177ec681f3Smrg * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK. 10187ec681f3Smrg * gfx9 and newer prefer FMA for F16 because of the packed instruction. 10197ec681f3Smrg * gfx10 and older prefer MAD for F32 because of the legacy instruction. 10207ec681f3Smrg */ 10217ec681f3Smrg .lower_ffma16 = sscreen->info.chip_class < GFX9, 10227ec681f3Smrg .lower_ffma32 = sscreen->info.chip_class < GFX10_3, 10237ec681f3Smrg .lower_ffma64 = false, 10247ec681f3Smrg .fuse_ffma16 = sscreen->info.chip_class >= GFX9, 10257ec681f3Smrg .fuse_ffma32 = sscreen->info.chip_class >= GFX10_3, 10267ec681f3Smrg .fuse_ffma64 = true, 10277ec681f3Smrg .lower_fmod = true, 10287ec681f3Smrg .lower_pack_snorm_4x8 = true, 10297ec681f3Smrg .lower_pack_unorm_4x8 = true, 10307ec681f3Smrg .lower_unpack_snorm_2x16 = true, 10317ec681f3Smrg .lower_unpack_snorm_4x8 = true, 10327ec681f3Smrg .lower_unpack_unorm_2x16 = true, 10337ec681f3Smrg .lower_unpack_unorm_4x8 = true, 10347ec681f3Smrg .lower_extract_byte = true, 10357ec681f3Smrg .lower_extract_word = true, 10367ec681f3Smrg .lower_insert_byte = true, 10377ec681f3Smrg .lower_insert_word = true, 10387ec681f3Smrg .lower_rotate = true, 10397ec681f3Smrg .lower_to_scalar = true, 10407ec681f3Smrg .has_dot_4x8 = sscreen->info.has_accelerated_dot_product, 10417ec681f3Smrg .has_dot_2x16 = sscreen->info.has_accelerated_dot_product, 10427ec681f3Smrg .optimize_sample_mask_in = true, 10437ec681f3Smrg .max_unroll_iterations = 32, 10447ec681f3Smrg .max_unroll_iterations_aggressive = 128, 10457ec681f3Smrg .use_interpolated_input_intrinsics = true, 10467ec681f3Smrg .lower_uniforms_to_ubo = true, 10477ec681f3Smrg .support_16bit_alu = sscreen->options.fp16, 10487ec681f3Smrg .vectorize_vec2_16bit = sscreen->options.fp16, 10497ec681f3Smrg .pack_varying_options = 10507ec681f3Smrg nir_pack_varying_interp_mode_none | 10517ec681f3Smrg nir_pack_varying_interp_mode_smooth | 10527ec681f3Smrg nir_pack_varying_interp_mode_noperspective | 10537ec681f3Smrg nir_pack_varying_interp_loc_center | 10547ec681f3Smrg nir_pack_varying_interp_loc_sample | 10557ec681f3Smrg nir_pack_varying_interp_loc_centroid, 10567ec681f3Smrg }; 10577ec681f3Smrg sscreen->nir_options = nir_options; 105801e04c3fSmrg} 1059