freedreno_screen.c revision 7ec681f3
1/* 2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_defines.h" 28#include "pipe/p_screen.h" 29#include "pipe/p_state.h" 30 31#include "util/format/u_format.h" 32#include "util/format/u_format_s3tc.h" 33#include "util/u_debug.h" 34#include "util/u_inlines.h" 35#include "util/u_memory.h" 36#include "util/u_screen.h" 37#include "util/u_string.h" 38#include "util/xmlconfig.h" 39 40#include "util/os_time.h" 41 42#include <errno.h> 43#include <stdio.h> 44#include <stdlib.h> 45#include "drm-uapi/drm_fourcc.h" 46#include <sys/sysinfo.h> 47 48#include "freedreno_fence.h" 49#include "freedreno_perfetto.h" 50#include "freedreno_query.h" 51#include "freedreno_resource.h" 52#include "freedreno_screen.h" 53#include "freedreno_util.h" 54 55#include "a2xx/fd2_screen.h" 56#include "a3xx/fd3_screen.h" 57#include "a4xx/fd4_screen.h" 58#include "a5xx/fd5_screen.h" 59#include "a6xx/fd6_screen.h" 60 61/* for fd_get_driver/device_uuid() */ 62#include "common/freedreno_uuid.h" 63 64#include "a2xx/ir2.h" 65#include "ir3/ir3_gallium.h" 66#include "ir3/ir3_nir.h" 67 68/* clang-format off */ 69static const struct debug_named_value fd_debug_options[] = { 70 {"msgs", FD_DBG_MSGS, "Print debug messages"}, 71 {"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly (a2xx only, see IR3_SHADER_DEBUG)"}, 72 {"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"}, 73 {"ddraw", FD_DBG_DDRAW, "Mark all state dirty after draw"}, 74 {"noscis", FD_DBG_NOSCIS, "Disable scissor optimization"}, 75 {"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"}, 76 {"nobypass", FD_DBG_NOBYPASS, "Disable GMEM bypass"}, 77 {"perf", FD_DBG_PERF, "Enable performance warnings"}, 78 {"nobin", FD_DBG_NOBIN, "Disable hw binning"}, 79 {"nogmem", FD_DBG_NOGMEM, "Disable GMEM rendering (bypass only)"}, 80 {"serialc", FD_DBG_SERIALC,"Disable asynchronous shader compile"}, 81 {"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"}, 82 {"flush", FD_DBG_FLUSH, "Force flush after every draw"}, 83 {"deqp", FD_DBG_DEQP, "Enable dEQP hacks"}, 84 {"inorder", FD_DBG_INORDER, "Disable reordering for draws/blits"}, 85 {"bstat", FD_DBG_BSTAT, "Print batch stats at context destroy"}, 86 {"nogrow", FD_DBG_NOGROW, "Disable \"growable\" cmdstream buffers, even if kernel supports it"}, 87 {"lrz", FD_DBG_LRZ, "Enable experimental LRZ support (a5xx)"}, 88 {"noindirect",FD_DBG_NOINDR, "Disable hw indirect draws (emulate on CPU)"}, 89 {"noblit", FD_DBG_NOBLIT, "Disable blitter (fallback to generic blit path)"}, 90 {"hiprio", FD_DBG_HIPRIO, "Force high-priority context"}, 91 {"ttile", FD_DBG_TTILE, "Enable texture tiling (a2xx/a3xx/a5xx)"}, 92 {"perfcntrs", FD_DBG_PERFC, "Expose performance counters"}, 93 {"noubwc", FD_DBG_NOUBWC, "Disable UBWC for all internal buffers"}, 94 {"nolrz", FD_DBG_NOLRZ, "Disable LRZ (a6xx)"}, 95 {"notile", FD_DBG_NOTILE, "Disable tiling for all internal buffers"}, 96 {"layout", FD_DBG_LAYOUT, "Dump resource layouts"}, 97 {"nofp16", FD_DBG_NOFP16, "Disable mediump precision lowering"}, 98 {"nohw", FD_DBG_NOHW, "Disable submitting commands to the HW"}, 99 DEBUG_NAMED_VALUE_END 100}; 101/* clang-format on */ 102 103DEBUG_GET_ONCE_FLAGS_OPTION(fd_mesa_debug, "FD_MESA_DEBUG", fd_debug_options, 0) 104 105int fd_mesa_debug = 0; 106bool fd_binning_enabled = true; 107 108static const char * 109fd_screen_get_name(struct pipe_screen *pscreen) 110{ 111 return fd_dev_name(fd_screen(pscreen)->dev_id); 112} 113 114static const char * 115fd_screen_get_vendor(struct pipe_screen *pscreen) 116{ 117 return "freedreno"; 118} 119 120static const char * 121fd_screen_get_device_vendor(struct pipe_screen *pscreen) 122{ 123 return "Qualcomm"; 124} 125 126static uint64_t 127fd_screen_get_timestamp(struct pipe_screen *pscreen) 128{ 129 struct fd_screen *screen = fd_screen(pscreen); 130 131 if (screen->has_timestamp) { 132 uint64_t n; 133 fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n); 134 debug_assert(screen->max_freq > 0); 135 return n * 1000000000 / screen->max_freq; 136 } else { 137 int64_t cpu_time = os_time_get() * 1000; 138 return cpu_time + screen->cpu_gpu_time_delta; 139 } 140} 141 142static void 143fd_screen_destroy(struct pipe_screen *pscreen) 144{ 145 struct fd_screen *screen = fd_screen(pscreen); 146 147 if (screen->pipe) 148 fd_pipe_del(screen->pipe); 149 150 if (screen->dev) { 151 fd_device_purge(screen->dev); 152 fd_device_del(screen->dev); 153 } 154 155 if (screen->ro) 156 screen->ro->destroy(screen->ro); 157 158 fd_bc_fini(&screen->batch_cache); 159 fd_gmem_screen_fini(pscreen); 160 161 slab_destroy_parent(&screen->transfer_pool); 162 163 simple_mtx_destroy(&screen->lock); 164 165 util_idalloc_mt_fini(&screen->buffer_ids); 166 167 u_transfer_helper_destroy(pscreen->transfer_helper); 168 169 if (screen->compiler) 170 ir3_screen_fini(pscreen); 171 172 free(screen->perfcntr_queries); 173 free(screen); 174} 175 176/* 177TODO either move caps to a2xx/a3xx specific code, or maybe have some 178tables for things that differ if the delta is not too much.. 179 */ 180static int 181fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) 182{ 183 struct fd_screen *screen = fd_screen(pscreen); 184 185 /* this is probably not totally correct.. but it's a start: */ 186 switch (param) { 187 /* Supported features (boolean caps). */ 188 case PIPE_CAP_NPOT_TEXTURES: 189 case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: 190 case PIPE_CAP_ANISOTROPIC_FILTER: 191 case PIPE_CAP_POINT_SPRITE: 192 case PIPE_CAP_BLEND_EQUATION_SEPARATE: 193 case PIPE_CAP_TEXTURE_SWIZZLE: 194 case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: 195 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: 196 case PIPE_CAP_SEAMLESS_CUBE_MAP: 197 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: 198 case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: 199 case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: 200 case PIPE_CAP_STRING_MARKER: 201 case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: 202 case PIPE_CAP_TEXTURE_BARRIER: 203 case PIPE_CAP_INVALIDATE_BUFFER: 204 case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: 205 case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: 206 case PIPE_CAP_NIR_COMPACT_ARRAYS: 207 return 1; 208 209 case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: 210 return is_a6xx(screen); 211 212 case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: 213 case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: 214 case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: 215 return !is_a2xx(screen); 216 217 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: 218 return is_a2xx(screen); 219 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: 220 return !is_a2xx(screen); 221 222 case PIPE_CAP_PACKED_UNIFORMS: 223 return !is_a2xx(screen); 224 225 case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: 226 case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: 227 return screen->has_robustness; 228 229 case PIPE_CAP_VERTEXID_NOBASE: 230 return is_a3xx(screen) || is_a4xx(screen); 231 232 case PIPE_CAP_COMPUTE: 233 return has_compute(screen); 234 235 case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: 236 case PIPE_CAP_PCI_GROUP: 237 case PIPE_CAP_PCI_BUS: 238 case PIPE_CAP_PCI_DEVICE: 239 case PIPE_CAP_PCI_FUNCTION: 240 return 0; 241 242 case PIPE_CAP_SUPPORTED_PRIM_MODES: 243 case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: 244 return screen->primtypes_mask; 245 246 case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD: 247 case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES: 248 case PIPE_CAP_VERTEX_SHADER_SATURATE: 249 case PIPE_CAP_PRIMITIVE_RESTART: 250 case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: 251 case PIPE_CAP_TGSI_INSTANCEID: 252 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: 253 case PIPE_CAP_INDEP_BLEND_ENABLE: 254 case PIPE_CAP_INDEP_BLEND_FUNC: 255 case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: 256 case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: 257 case PIPE_CAP_CONDITIONAL_RENDER: 258 case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: 259 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: 260 case PIPE_CAP_CLIP_HALFZ: 261 return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || 262 is_a6xx(screen); 263 264 case PIPE_CAP_FAKE_SW_MSAA: 265 return !fd_screen_get_param(pscreen, PIPE_CAP_TEXTURE_MULTISAMPLE); 266 267 case PIPE_CAP_TEXTURE_MULTISAMPLE: 268 return is_a5xx(screen) || is_a6xx(screen); 269 270 case PIPE_CAP_SURFACE_SAMPLE_COUNT: 271 return is_a6xx(screen); 272 273 case PIPE_CAP_DEPTH_CLIP_DISABLE: 274 return is_a3xx(screen) || is_a4xx(screen) || is_a6xx(screen); 275 276 case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: 277 return is_a6xx(screen); 278 279 case PIPE_CAP_POLYGON_OFFSET_CLAMP: 280 return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); 281 282 case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF: 283 return 0; 284 285 case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: 286 if (is_a3xx(screen)) 287 return 16; 288 if (is_a4xx(screen)) 289 return 32; 290 if (is_a5xx(screen) || is_a6xx(screen)) 291 return 64; 292 return 0; 293 case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: 294 /* We could possibly emulate more by pretending 2d/rect textures and 295 * splitting high bits of index into 2nd dimension.. 296 */ 297 if (is_a3xx(screen)) 298 return 8192; 299 if (is_a4xx(screen)) 300 return 16384; 301 302 /* Note that the Vulkan blob on a540 and 640 report a 303 * maxTexelBufferElements of just 65536 (the GLES3.2 and Vulkan 304 * minimum). 305 */ 306 if (is_a5xx(screen) || is_a6xx(screen)) 307 return 1 << 27; 308 return 0; 309 310 case PIPE_CAP_TEXTURE_FLOAT_LINEAR: 311 case PIPE_CAP_CUBE_MAP_ARRAY: 312 case PIPE_CAP_SAMPLER_VIEW_TARGET: 313 case PIPE_CAP_TEXTURE_QUERY_LOD: 314 return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen); 315 316 case PIPE_CAP_START_INSTANCE: 317 /* Note that a5xx can do this, it just can't (at least with 318 * current firmware) do draw_indirect with base_instance. 319 * Since draw_indirect is needed sooner (gles31 and gl40 vs 320 * gl42), hide base_instance on a5xx. :-/ 321 */ 322 return is_a4xx(screen); 323 324 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: 325 return 64; 326 327 case PIPE_CAP_GLSL_FEATURE_LEVEL: 328 case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: 329 if (is_a6xx(screen)) 330 return 330; 331 else if (is_ir3(screen)) 332 return 140; 333 else 334 return 120; 335 336 case PIPE_CAP_ESSL_FEATURE_LEVEL: 337 /* we can probably enable 320 for a5xx too, but need to test: */ 338 if (is_a6xx(screen)) 339 return 320; 340 if (is_a5xx(screen)) 341 return 310; 342 if (is_ir3(screen)) 343 return 300; 344 return 120; 345 346 case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: 347 if (is_a6xx(screen)) 348 return 64; 349 if (is_a5xx(screen)) 350 return 4; 351 return 0; 352 353 case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: 354 if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) 355 return 4; 356 return 0; 357 358 /* TODO if we need this, do it in nir/ir3 backend to avoid breaking 359 * precompile: */ 360 case PIPE_CAP_FORCE_PERSAMPLE_INTERP: 361 return 0; 362 363 case PIPE_CAP_FBFETCH: 364 if (fd_device_version(screen->dev) >= FD_VERSION_GMEM_BASE && 365 is_a6xx(screen)) 366 return 1; 367 return 0; 368 case PIPE_CAP_SAMPLE_SHADING: 369 if (is_a6xx(screen)) 370 return 1; 371 return 0; 372 373 case PIPE_CAP_CONTEXT_PRIORITY_MASK: 374 return screen->priority_mask; 375 376 case PIPE_CAP_DRAW_INDIRECT: 377 if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) 378 return 1; 379 return 0; 380 381 case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: 382 if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)) 383 return 1; 384 return 0; 385 386 case PIPE_CAP_LOAD_CONSTBUF: 387 /* name is confusing, but this turns on std430 packing */ 388 if (is_ir3(screen)) 389 return 1; 390 return 0; 391 392 case PIPE_CAP_NIR_IMAGES_AS_DEREF: 393 return 0; 394 395 case PIPE_CAP_MAX_VIEWPORTS: 396 return 1; 397 398 case PIPE_CAP_MAX_VARYINGS: 399 return is_a6xx(screen) ? 31 : 16; 400 401 case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: 402 /* We don't really have a limit on this, it all goes into the main 403 * memory buffer. Needs to be at least 120 / 4 (minimum requirement 404 * for GL_MAX_TESS_PATCH_COMPONENTS). 405 */ 406 return 128; 407 408 case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: 409 return 64 * 1024 * 1024; 410 411 case PIPE_CAP_SHAREABLE_SHADERS: 412 case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: 413 if (is_ir3(screen)) 414 return 1; 415 return 0; 416 417 /* Geometry shaders.. */ 418 case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: 419 return 512; 420 case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: 421 return 2048; 422 case PIPE_CAP_MAX_GS_INVOCATIONS: 423 return 32; 424 425 /* Only a2xx has the half-border clamp mode in HW, just have mesa/st lower 426 * it for later HW. 427 */ 428 case PIPE_CAP_GL_CLAMP: 429 return is_a2xx(screen); 430 431 case PIPE_CAP_CLIP_PLANES: 432 /* On a3xx, there is HW support for GL user clip planes that 433 * occasionally has to fall back to shader key-based lowering to clip 434 * distances in the VS, and we don't support clip distances so that is 435 * always shader-based lowering in the FS. 436 * 437 * On a4xx, there is no HW support for clip planes, so they are 438 * always lowered to clip distances. We also lack SW support for the 439 * HW's clip distances in HW, so we do shader-based lowering in the FS 440 * in the driver backend. 441 * 442 * On a5xx-a6xx, we have the HW clip distances hooked up, so we just let 443 * mesa/st lower desktop GL's clip planes to clip distances in the last 444 * vertex shader stage. 445 */ 446 return !is_a5xx(screen) && !is_a6xx(screen); 447 448 /* Stream output. */ 449 case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: 450 if (is_ir3(screen)) 451 return PIPE_MAX_SO_BUFFERS; 452 return 0; 453 case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: 454 case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: 455 case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: 456 case PIPE_CAP_TGSI_TEXCOORD: 457 if (is_ir3(screen)) 458 return 1; 459 return 0; 460 case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: 461 return 1; 462 case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL: 463 return is_a2xx(screen); 464 case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: 465 case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: 466 if (is_ir3(screen)) 467 return 16 * 4; /* should only be shader out limit? */ 468 return 0; 469 470 /* Texturing. */ 471 case PIPE_CAP_MAX_TEXTURE_2D_SIZE: 472 if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen)) 473 return 16384; 474 else 475 return 8192; 476 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: 477 if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen)) 478 return 15; 479 else 480 return 14; 481 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: 482 return 11; 483 484 case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: 485 return (is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || 486 is_a6xx(screen)) 487 ? 256 488 : 0; 489 490 /* Render targets. */ 491 case PIPE_CAP_MAX_RENDER_TARGETS: 492 return screen->max_rts; 493 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: 494 return (is_a3xx(screen) || is_a6xx(screen)) ? 1 : 0; 495 496 /* Queries. */ 497 case PIPE_CAP_OCCLUSION_QUERY: 498 return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || 499 is_a6xx(screen); 500 case PIPE_CAP_QUERY_TIMESTAMP: 501 case PIPE_CAP_QUERY_TIME_ELAPSED: 502 /* only a4xx, requires new enough kernel so we know max_freq: */ 503 return (screen->max_freq > 0) && 504 (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen)); 505 506 case PIPE_CAP_VENDOR_ID: 507 return 0x5143; 508 case PIPE_CAP_DEVICE_ID: 509 return 0xFFFFFFFF; 510 case PIPE_CAP_ACCELERATED: 511 return 1; 512 case PIPE_CAP_VIDEO_MEMORY: 513 DBG("FINISHME: The value returned is incorrect\n"); 514 return 10; 515 case PIPE_CAP_UMA: 516 return 1; 517 case PIPE_CAP_MEMOBJ: 518 return fd_device_version(screen->dev) >= FD_VERSION_MEMORY_FD; 519 case PIPE_CAP_NATIVE_FENCE_FD: 520 return fd_device_version(screen->dev) >= FD_VERSION_FENCE_FD; 521 case PIPE_CAP_FENCE_SIGNAL: 522 return screen->has_syncobj; 523 case PIPE_CAP_CULL_DISTANCE: 524 return is_a6xx(screen); 525 case PIPE_CAP_SHADER_STENCIL_EXPORT: 526 return is_a6xx(screen); 527 case PIPE_CAP_TWO_SIDED_COLOR: 528 return 0; 529 default: 530 return u_pipe_screen_get_param_defaults(pscreen, param); 531 } 532} 533 534static float 535fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) 536{ 537 switch (param) { 538 case PIPE_CAPF_MAX_LINE_WIDTH: 539 case PIPE_CAPF_MAX_LINE_WIDTH_AA: 540 /* NOTE: actual value is 127.0f, but this is working around a deqp 541 * bug.. dEQP-GLES3.functional.rasterization.primitives.lines_wide 542 * uses too small of a render target size, and gets confused when 543 * the lines start going offscreen. 544 * 545 * See: https://code.google.com/p/android/issues/detail?id=206513 546 */ 547 if (FD_DBG(DEQP)) 548 return 48.0f; 549 return 127.0f; 550 case PIPE_CAPF_MAX_POINT_WIDTH: 551 case PIPE_CAPF_MAX_POINT_WIDTH_AA: 552 return 4092.0f; 553 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: 554 return 16.0f; 555 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: 556 return 15.0f; 557 case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: 558 case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: 559 case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: 560 return 0.0f; 561 } 562 mesa_loge("unknown paramf %d", param); 563 return 0; 564} 565 566static int 567fd_screen_get_shader_param(struct pipe_screen *pscreen, 568 enum pipe_shader_type shader, 569 enum pipe_shader_cap param) 570{ 571 struct fd_screen *screen = fd_screen(pscreen); 572 573 switch (shader) { 574 case PIPE_SHADER_FRAGMENT: 575 case PIPE_SHADER_VERTEX: 576 break; 577 case PIPE_SHADER_TESS_CTRL: 578 case PIPE_SHADER_TESS_EVAL: 579 case PIPE_SHADER_GEOMETRY: 580 if (is_a6xx(screen)) 581 break; 582 return 0; 583 case PIPE_SHADER_COMPUTE: 584 if (has_compute(screen)) 585 break; 586 return 0; 587 default: 588 mesa_loge("unknown shader type %d", shader); 589 return 0; 590 } 591 592 /* this is probably not totally correct.. but it's a start: */ 593 switch (param) { 594 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: 595 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: 596 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: 597 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: 598 return 16384; 599 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: 600 return 8; /* XXX */ 601 case PIPE_SHADER_CAP_MAX_INPUTS: 602 if (shader == PIPE_SHADER_GEOMETRY && is_a6xx(screen)) 603 return 16; 604 return is_a6xx(screen) ? 32 : 16; 605 case PIPE_SHADER_CAP_MAX_OUTPUTS: 606 return is_a6xx(screen) ? 32 : 16; 607 case PIPE_SHADER_CAP_MAX_TEMPS: 608 return 64; /* Max native temporaries. */ 609 case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: 610 /* NOTE: seems to be limit for a3xx is actually 512 but 611 * split between VS and FS. Use lower limit of 256 to 612 * avoid getting into impossible situations: 613 */ 614 return ((is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) || 615 is_a6xx(screen)) 616 ? 4096 617 : 64) * 618 sizeof(float[4]); 619 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: 620 return is_ir3(screen) ? 16 : 1; 621 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: 622 return 1; 623 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: 624 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: 625 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: 626 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: 627 /* a2xx compiler doesn't handle indirect: */ 628 return is_ir3(screen) ? 1 : 0; 629 case PIPE_SHADER_CAP_SUBROUTINES: 630 case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: 631 case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: 632 case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: 633 case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: 634 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: 635 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: 636 case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: 637 case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: 638 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: 639 return 0; 640 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: 641 return 1; 642 case PIPE_SHADER_CAP_INTEGERS: 643 return is_ir3(screen) ? 1 : 0; 644 case PIPE_SHADER_CAP_INT64_ATOMICS: 645 case PIPE_SHADER_CAP_FP16_DERIVATIVES: 646 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: 647 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: 648 return 0; 649 case PIPE_SHADER_CAP_INT16: 650 case PIPE_SHADER_CAP_FP16: 651 return ( 652 (is_a5xx(screen) || is_a6xx(screen)) && 653 (shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT) && 654 !FD_DBG(NOFP16)); 655 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: 656 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: 657 return 16; 658 case PIPE_SHADER_CAP_PREFERRED_IR: 659 return PIPE_SHADER_IR_NIR; 660 case PIPE_SHADER_CAP_SUPPORTED_IRS: 661 return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI); 662 case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: 663 return 32; 664 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: 665 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: 666 if (is_a5xx(screen) || is_a6xx(screen)) { 667 /* a5xx (and a4xx for that matter) has one state-block 668 * for compute-shader SSBO's and another that is shared 669 * by VS/HS/DS/GS/FS.. so to simplify things for now 670 * just advertise SSBOs for FS and CS. We could possibly 671 * do what blob does, and partition the space for 672 * VS/HS/DS/GS/FS. The blob advertises: 673 * 674 * GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4 675 * GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4 676 * GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4 677 * GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4 678 * GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4 679 * GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24 680 * GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24 681 * 682 * I think that way we could avoid having to patch shaders 683 * for actual SSBO indexes by using a static partitioning. 684 * 685 * Note same state block is used for images and buffers, 686 * but images also need texture state for read access 687 * (isam/isam.3d) 688 */ 689 switch (shader) { 690 case PIPE_SHADER_FRAGMENT: 691 case PIPE_SHADER_COMPUTE: 692 return 24; 693 default: 694 return 0; 695 } 696 } 697 return 0; 698 } 699 mesa_loge("unknown shader param %d", param); 700 return 0; 701} 702 703/* TODO depending on how much the limits differ for a3xx/a4xx, maybe move this 704 * into per-generation backend? 705 */ 706static int 707fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type, 708 enum pipe_compute_cap param, void *ret) 709{ 710 struct fd_screen *screen = fd_screen(pscreen); 711 const char *const ir = "ir3"; 712 713 if (!has_compute(screen)) 714 return 0; 715 716#define RET(x) \ 717 do { \ 718 if (ret) \ 719 memcpy(ret, x, sizeof(x)); \ 720 return sizeof(x); \ 721 } while (0) 722 723 switch (param) { 724 case PIPE_COMPUTE_CAP_ADDRESS_BITS: 725 // don't expose 64b pointer support yet, until ir3 supports 64b 726 // math, otherwise spir64 target is used and we get 64b pointer 727 // calculations that we can't do yet 728 // if (is_a5xx(screen)) 729 // RET((uint32_t []){ 64 }); 730 RET((uint32_t[]){32}); 731 732 case PIPE_COMPUTE_CAP_IR_TARGET: 733 if (ret) 734 sprintf(ret, "%s", ir); 735 return strlen(ir) * sizeof(char); 736 737 case PIPE_COMPUTE_CAP_GRID_DIMENSION: 738 RET((uint64_t[]){3}); 739 740 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: 741 RET(((uint64_t[]){65535, 65535, 65535})); 742 743 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: 744 RET(((uint64_t[]){1024, 1024, 64})); 745 746 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: 747 RET((uint64_t[]){1024}); 748 749 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: 750 RET((uint64_t[]){screen->ram_size}); 751 752 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: 753 RET((uint64_t[]){32768}); 754 755 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: 756 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: 757 RET((uint64_t[]){4096}); 758 759 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: 760 RET((uint64_t[]){screen->ram_size}); 761 762 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: 763 RET((uint32_t[]){screen->max_freq / 1000000}); 764 765 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: 766 RET((uint32_t[]){9999}); // TODO 767 768 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: 769 RET((uint32_t[]){1}); 770 771 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: 772 RET((uint32_t[]){32}); // TODO 773 774 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: 775 RET((uint64_t[]){1024}); // TODO 776 } 777 778 return 0; 779} 780 781static const void * 782fd_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir, 783 unsigned shader) 784{ 785 struct fd_screen *screen = fd_screen(pscreen); 786 787 if (is_ir3(screen)) 788 return ir3_get_compiler_options(screen->compiler); 789 790 return ir2_get_compiler_options(); 791} 792 793static struct disk_cache * 794fd_get_disk_shader_cache(struct pipe_screen *pscreen) 795{ 796 struct fd_screen *screen = fd_screen(pscreen); 797 798 if (is_ir3(screen)) { 799 struct ir3_compiler *compiler = screen->compiler; 800 return compiler->disk_cache; 801 } 802 803 return NULL; 804} 805 806bool 807fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo, 808 struct renderonly_scanout *scanout, unsigned stride, 809 struct winsys_handle *whandle) 810{ 811 struct fd_screen *screen = fd_screen(pscreen); 812 813 whandle->stride = stride; 814 815 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { 816 return fd_bo_get_name(bo, &whandle->handle) == 0; 817 } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { 818 if (screen->ro) { 819 return renderonly_get_handle(scanout, whandle); 820 } else { 821 whandle->handle = fd_bo_handle(bo); 822 return true; 823 } 824 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { 825 whandle->handle = fd_bo_dmabuf(bo); 826 return true; 827 } else { 828 return false; 829 } 830} 831 832static void 833fd_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen, 834 enum pipe_format format, int max, 835 uint64_t *modifiers, 836 unsigned int *external_only, int *count) 837{ 838 struct fd_screen *screen = fd_screen(pscreen); 839 int i, num = 0; 840 841 max = MIN2(max, screen->num_supported_modifiers); 842 843 if (!max) { 844 max = screen->num_supported_modifiers; 845 external_only = NULL; 846 modifiers = NULL; 847 } 848 849 for (i = 0; i < max; i++) { 850 if (modifiers) 851 modifiers[num] = screen->supported_modifiers[i]; 852 853 if (external_only) 854 external_only[num] = 0; 855 856 num++; 857 } 858 859 *count = num; 860} 861 862static bool 863fd_screen_is_dmabuf_modifier_supported(struct pipe_screen *pscreen, 864 uint64_t modifier, 865 enum pipe_format format, 866 bool *external_only) 867{ 868 struct fd_screen *screen = fd_screen(pscreen); 869 int i; 870 871 for (i = 0; i < screen->num_supported_modifiers; i++) { 872 if (modifier == screen->supported_modifiers[i]) { 873 if (external_only) 874 *external_only = false; 875 876 return true; 877 } 878 } 879 880 return false; 881} 882 883struct fd_bo * 884fd_screen_bo_from_handle(struct pipe_screen *pscreen, 885 struct winsys_handle *whandle) 886{ 887 struct fd_screen *screen = fd_screen(pscreen); 888 struct fd_bo *bo; 889 890 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) { 891 bo = fd_bo_from_name(screen->dev, whandle->handle); 892 } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) { 893 bo = fd_bo_from_handle(screen->dev, whandle->handle, 0); 894 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) { 895 bo = fd_bo_from_dmabuf(screen->dev, whandle->handle); 896 } else { 897 DBG("Attempt to import unsupported handle type %d", whandle->type); 898 return NULL; 899 } 900 901 if (!bo) { 902 DBG("ref name 0x%08x failed", whandle->handle); 903 return NULL; 904 } 905 906 return bo; 907} 908 909static void 910_fd_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr, 911 struct pipe_fence_handle *pfence) 912{ 913 fd_fence_ref(ptr, pfence); 914} 915 916static void 917fd_screen_get_device_uuid(struct pipe_screen *pscreen, char *uuid) 918{ 919 struct fd_screen *screen = fd_screen(pscreen); 920 921 fd_get_device_uuid(uuid, screen->dev_id); 922} 923 924static void 925fd_screen_get_driver_uuid(struct pipe_screen *pscreen, char *uuid) 926{ 927 fd_get_driver_uuid(uuid); 928} 929 930struct pipe_screen * 931fd_screen_create(struct fd_device *dev, struct renderonly *ro, 932 const struct pipe_screen_config *config) 933{ 934 struct fd_screen *screen = CALLOC_STRUCT(fd_screen); 935 struct pipe_screen *pscreen; 936 uint64_t val; 937 938 fd_mesa_debug = debug_get_option_fd_mesa_debug(); 939 940 if (FD_DBG(NOBIN)) 941 fd_binning_enabled = false; 942 943 if (!screen) 944 return NULL; 945 946#ifdef HAVE_PERFETTO 947 fd_perfetto_init(); 948#endif 949 950 pscreen = &screen->base; 951 952 screen->dev = dev; 953 screen->ro = ro; 954 screen->refcnt = 1; 955 956 // maybe this should be in context? 957 screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D); 958 if (!screen->pipe) { 959 DBG("could not create 3d pipe"); 960 goto fail; 961 } 962 963 if (fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val)) { 964 DBG("could not get GMEM size"); 965 goto fail; 966 } 967 screen->gmemsize_bytes = env_var_as_unsigned("FD_MESA_GMEM", val); 968 969 if (fd_device_version(dev) >= FD_VERSION_GMEM_BASE) { 970 fd_pipe_get_param(screen->pipe, FD_GMEM_BASE, &screen->gmem_base); 971 } 972 973 if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) { 974 DBG("could not get gpu freq"); 975 /* this limits what performance related queries are 976 * supported but is not fatal 977 */ 978 screen->max_freq = 0; 979 } else { 980 screen->max_freq = val; 981 if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0) 982 screen->has_timestamp = true; 983 } 984 985 screen->dev_id = fd_pipe_dev_id(screen->pipe); 986 987 if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) { 988 DBG("could not get gpu-id"); 989 goto fail; 990 } 991 screen->gpu_id = val; 992 993 if (fd_pipe_get_param(screen->pipe, FD_CHIP_ID, &val)) { 994 DBG("could not get chip-id"); 995 /* older kernels may not have this property: */ 996 unsigned core = screen->gpu_id / 100; 997 unsigned major = (screen->gpu_id % 100) / 10; 998 unsigned minor = screen->gpu_id % 10; 999 unsigned patch = 0; /* assume the worst */ 1000 val = (patch & 0xff) | ((minor & 0xff) << 8) | ((major & 0xff) << 16) | 1001 ((core & 0xff) << 24); 1002 } 1003 screen->chip_id = val; 1004 screen->gen = fd_dev_gen(screen->dev_id); 1005 1006 if (fd_pipe_get_param(screen->pipe, FD_NR_RINGS, &val)) { 1007 DBG("could not get # of rings"); 1008 screen->priority_mask = 0; 1009 } else { 1010 /* # of rings equates to number of unique priority values: */ 1011 screen->priority_mask = (1 << val) - 1; 1012 } 1013 1014 if (fd_device_version(dev) >= FD_VERSION_ROBUSTNESS) 1015 screen->has_robustness = true; 1016 1017 screen->has_syncobj = fd_has_syncobj(screen->dev); 1018 1019 /* parse driconf configuration now for device specific overrides: */ 1020 driParseConfigFiles(config->options, config->options_info, 0, "msm", 1021 NULL, fd_dev_name(screen->dev_id), NULL, 0, NULL, 0); 1022 1023 struct sysinfo si; 1024 sysinfo(&si); 1025 screen->ram_size = si.totalram; 1026 1027 DBG("Pipe Info:"); 1028 DBG(" GPU-id: %s", fd_dev_name(screen->dev_id)); 1029 DBG(" Chip-id: 0x%016"PRIx64, screen->chip_id); 1030 DBG(" GMEM size: 0x%08x", screen->gmemsize_bytes); 1031 1032 const struct fd_dev_info *info = fd_dev_info(screen->dev_id); 1033 if (!info) { 1034 mesa_loge("unsupported GPU: a%03d", screen->gpu_id); 1035 goto fail; 1036 } 1037 1038 screen->info = info; 1039 1040 /* explicitly checking for GPU revisions that are known to work. This 1041 * may be overly conservative for a3xx, where spoofing the gpu_id with 1042 * the blob driver seems to generate identical cmdstream dumps. But 1043 * on a2xx, there seem to be small differences between the GPU revs 1044 * so it is probably better to actually test first on real hardware 1045 * before enabling: 1046 * 1047 * If you have a different adreno version, feel free to add it to one 1048 * of the cases below and see what happens. And if it works, please 1049 * send a patch ;-) 1050 */ 1051 switch (screen->gen) { 1052 case 2: 1053 fd2_screen_init(pscreen); 1054 break; 1055 case 3: 1056 fd3_screen_init(pscreen); 1057 break; 1058 case 4: 1059 fd4_screen_init(pscreen); 1060 break; 1061 case 5: 1062 fd5_screen_init(pscreen); 1063 break; 1064 case 6: 1065 fd6_screen_init(pscreen); 1066 break; 1067 default: 1068 mesa_loge("unsupported GPU generation: a%uxx", screen->gen); 1069 goto fail; 1070 } 1071 1072 /* fdN_screen_init() should set this: */ 1073 assert(screen->primtypes); 1074 screen->primtypes_mask = 0; 1075 for (unsigned i = 0; i <= PIPE_PRIM_MAX; i++) 1076 if (screen->primtypes[i]) 1077 screen->primtypes_mask |= (1 << i); 1078 1079 if (FD_DBG(PERFC)) { 1080 screen->perfcntr_groups = 1081 fd_perfcntrs(screen->dev_id, &screen->num_perfcntr_groups); 1082 } 1083 1084 /* NOTE: don't enable if we have too old of a kernel to support 1085 * growable cmdstream buffers, since memory requirement for cmdstream 1086 * buffers would be too much otherwise. 1087 */ 1088 if (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS) 1089 screen->reorder = !FD_DBG(INORDER); 1090 1091 fd_bc_init(&screen->batch_cache); 1092 1093 list_inithead(&screen->context_list); 1094 1095 util_idalloc_mt_init_tc(&screen->buffer_ids); 1096 1097 (void)simple_mtx_init(&screen->lock, mtx_plain); 1098 1099 pscreen->destroy = fd_screen_destroy; 1100 pscreen->get_param = fd_screen_get_param; 1101 pscreen->get_paramf = fd_screen_get_paramf; 1102 pscreen->get_shader_param = fd_screen_get_shader_param; 1103 pscreen->get_compute_param = fd_get_compute_param; 1104 pscreen->get_compiler_options = fd_get_compiler_options; 1105 pscreen->get_disk_shader_cache = fd_get_disk_shader_cache; 1106 1107 fd_resource_screen_init(pscreen); 1108 fd_query_screen_init(pscreen); 1109 fd_gmem_screen_init(pscreen); 1110 1111 pscreen->get_name = fd_screen_get_name; 1112 pscreen->get_vendor = fd_screen_get_vendor; 1113 pscreen->get_device_vendor = fd_screen_get_device_vendor; 1114 1115 pscreen->get_timestamp = fd_screen_get_timestamp; 1116 1117 pscreen->fence_reference = _fd_fence_ref; 1118 pscreen->fence_finish = fd_fence_finish; 1119 pscreen->fence_get_fd = fd_fence_get_fd; 1120 1121 pscreen->query_dmabuf_modifiers = fd_screen_query_dmabuf_modifiers; 1122 pscreen->is_dmabuf_modifier_supported = 1123 fd_screen_is_dmabuf_modifier_supported; 1124 1125 pscreen->get_device_uuid = fd_screen_get_device_uuid; 1126 pscreen->get_driver_uuid = fd_screen_get_driver_uuid; 1127 1128 slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16); 1129 1130 return pscreen; 1131 1132fail: 1133 fd_screen_destroy(pscreen); 1134 return NULL; 1135} 1136