1/* 2 * Copyright 2010 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23#include <errno.h> 24#include <xf86drm.h> 25#include <nouveau_drm.h> 26#include "util/u_format.h" 27#include "util/u_format_s3tc.h" 28#include "util/u_screen.h" 29#include "pipe/p_screen.h" 30#include "compiler/nir/nir.h" 31 32#include "nv50/nv50_context.h" 33#include "nv50/nv50_screen.h" 34 35#include "nouveau_vp3_video.h" 36 37#include "nv_object.xml.h" 38 39/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */ 40#define LOCAL_WARPS_ALLOC 32 41/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */ 42#define STACK_WARPS_ALLOC 32 43 44#define THREADS_IN_WARP 32 45 46static boolean 47nv50_screen_is_format_supported(struct pipe_screen *pscreen, 48 enum pipe_format format, 49 enum pipe_texture_target target, 50 unsigned sample_count, 51 unsigned storage_sample_count, 52 unsigned bindings) 53{ 54 if (sample_count > 8) 55 return false; 56 if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */ 57 return false; 58 if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128) 59 return false; 60 61 if (MAX2(1, sample_count) != MAX2(1, storage_sample_count)) 62 return false; 63 64 switch (format) { 65 case PIPE_FORMAT_Z16_UNORM: 66 if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS) 67 return false; 68 break; 69 default: 70 break; 71 } 72 73 if (bindings & PIPE_BIND_LINEAR) 74 if (util_format_is_depth_or_stencil(format) || 75 (target != PIPE_TEXTURE_1D && 76 target != PIPE_TEXTURE_2D && 77 target != PIPE_TEXTURE_RECT) || 78 sample_count > 1) 79 return false; 80 81 /* shared is always supported */ 82 bindings &= ~(PIPE_BIND_LINEAR | 83 PIPE_BIND_SHARED); 84 85 return (( nv50_format_table[format].usage | 86 nv50_vertex_format[format].usage) & bindings) == bindings; 87} 88 89static int 90nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) 91{ 92 const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; 93 struct nouveau_device *dev = nouveau_screen(pscreen)->device; 94 95 switch (param) { 96 /* non-boolean caps */ 97 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: 98 return 14; 99 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: 100 return 12; 101 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: 102 return 14; 103 case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: 104 return 512; 105 case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: 106 case PIPE_CAP_MIN_TEXEL_OFFSET: 107 return -8; 108 case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: 109 case PIPE_CAP_MAX_TEXEL_OFFSET: 110 return 7; 111 case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: 112 return 128 * 1024 * 1024; 113 case PIPE_CAP_GLSL_FEATURE_LEVEL: 114 return 330; 115 case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: 116 return 330; 117 case PIPE_CAP_MAX_RENDER_TARGETS: 118 return 8; 119 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: 120 return 1; 121 case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: 122 case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS: 123 return 8; 124 case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: 125 return 4; 126 case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: 127 case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: 128 return 64; 129 case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: 130 case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: 131 return 1024; 132 case PIPE_CAP_MAX_VERTEX_STREAMS: 133 return 1; 134 case PIPE_CAP_MAX_GS_INVOCATIONS: 135 return 0; 136 case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: 137 return 0; 138 case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: 139 return 2048; 140 case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET: 141 return 2047; 142 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: 143 return 256; 144 case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: 145 return 16; /* 256 for binding as RT, but that's not possible in GL */ 146 case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: 147 return NOUVEAU_MIN_BUFFER_MAP_ALIGN; 148 case PIPE_CAP_MAX_VIEWPORTS: 149 return NV50_MAX_VIEWPORTS; 150 case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: 151 return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50; 152 case PIPE_CAP_ENDIANNESS: 153 return PIPE_ENDIAN_LITTLE; 154 case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: 155 return (class_3d >= NVA3_3D_CLASS) ? 4 : 0; 156 case PIPE_CAP_MAX_WINDOW_RECTANGLES: 157 return NV50_MAX_WINDOW_RECTANGLES; 158 case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: 159 return 16 * 1024 * 1024; 160 case PIPE_CAP_MAX_VARYINGS: 161 return 15; 162 163 /* supported caps */ 164 case PIPE_CAP_TEXTURE_MIRROR_CLAMP: 165 case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE: 166 case PIPE_CAP_TEXTURE_SWIZZLE: 167 case PIPE_CAP_NPOT_TEXTURES: 168 case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: 169 case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: 170 case PIPE_CAP_ANISOTROPIC_FILTER: 171 case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: 172 case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: 173 case PIPE_CAP_DEPTH_CLIP_DISABLE: 174 case PIPE_CAP_POINT_SPRITE: 175 case PIPE_CAP_SM3: 176 case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: 177 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: 178 case PIPE_CAP_VERTEX_COLOR_CLAMPED: 179 case PIPE_CAP_QUERY_TIMESTAMP: 180 case PIPE_CAP_QUERY_TIME_ELAPSED: 181 case PIPE_CAP_OCCLUSION_QUERY: 182 case PIPE_CAP_BLEND_EQUATION_SEPARATE: 183 case PIPE_CAP_INDEP_BLEND_ENABLE: 184 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: 185 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: 186 case PIPE_CAP_PRIMITIVE_RESTART: 187 case PIPE_CAP_TGSI_INSTANCEID: 188 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: 189 case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: 190 case PIPE_CAP_CONDITIONAL_RENDER: 191 case PIPE_CAP_TEXTURE_BARRIER: 192 case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: 193 case PIPE_CAP_START_INSTANCE: 194 case PIPE_CAP_USER_VERTEX_BUFFERS: 195 case PIPE_CAP_TEXTURE_MULTISAMPLE: 196 case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: 197 case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: 198 case PIPE_CAP_SAMPLER_VIEW_TARGET: 199 case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: 200 case PIPE_CAP_CLIP_HALFZ: 201 case PIPE_CAP_POLYGON_OFFSET_CLAMP: 202 case PIPE_CAP_QUERY_PIPELINE_STATISTICS: 203 case PIPE_CAP_TEXTURE_FLOAT_LINEAR: 204 case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: 205 case PIPE_CAP_DEPTH_BOUNDS_TEST: 206 case PIPE_CAP_TGSI_TXQS: 207 case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: 208 case PIPE_CAP_SHAREABLE_SHADERS: 209 case PIPE_CAP_CLEAR_TEXTURE: 210 case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: 211 case PIPE_CAP_INVALIDATE_BUFFER: 212 case PIPE_CAP_STRING_MARKER: 213 case PIPE_CAP_CULL_DISTANCE: 214 case PIPE_CAP_TGSI_ARRAY_COMPONENTS: 215 case PIPE_CAP_TGSI_MUL_ZERO_WINS: 216 case PIPE_CAP_TGSI_TEX_TXF_LZ: 217 case PIPE_CAP_TGSI_CLOCK: 218 case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: 219 case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: 220 case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: 221 case PIPE_CAP_TGSI_DIV: 222 return 1; 223 case PIPE_CAP_SEAMLESS_CUBE_MAP: 224 return 1; /* class_3d >= NVA0_3D_CLASS; */ 225 /* supported on nva0+ */ 226 case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: 227 return class_3d >= NVA0_3D_CLASS; 228 /* supported on nva3+ */ 229 case PIPE_CAP_CUBE_MAP_ARRAY: 230 case PIPE_CAP_INDEP_BLEND_FUNC: 231 case PIPE_CAP_TEXTURE_QUERY_LOD: 232 case PIPE_CAP_SAMPLE_SHADING: 233 case PIPE_CAP_FORCE_PERSAMPLE_INTERP: 234 return class_3d >= NVA3_3D_CLASS; 235 236 /* unsupported caps */ 237 case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE: 238 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: 239 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: 240 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: 241 case PIPE_CAP_SHADER_STENCIL_EXPORT: 242 case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: 243 case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: 244 case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: 245 case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: 246 case PIPE_CAP_TGSI_TEXCOORD: 247 case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: 248 case PIPE_CAP_TEXTURE_GATHER_SM5: 249 case PIPE_CAP_FAKE_SW_MSAA: 250 case PIPE_CAP_TEXTURE_GATHER_OFFSETS: 251 case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: 252 case PIPE_CAP_DRAW_INDIRECT: 253 case PIPE_CAP_MULTI_DRAW_INDIRECT: 254 case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: 255 case PIPE_CAP_VERTEXID_NOBASE: 256 case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */ 257 case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: 258 case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: 259 case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: 260 case PIPE_CAP_DRAW_PARAMETERS: 261 case PIPE_CAP_TGSI_PACK_HALF_FLOAT: 262 case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: 263 case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: 264 case PIPE_CAP_GENERATE_MIPMAP: 265 case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: 266 case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: 267 case PIPE_CAP_QUERY_BUFFER_OBJECT: 268 case PIPE_CAP_QUERY_MEMORY_INFO: 269 case PIPE_CAP_PCI_GROUP: 270 case PIPE_CAP_PCI_BUS: 271 case PIPE_CAP_PCI_DEVICE: 272 case PIPE_CAP_PCI_FUNCTION: 273 case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: 274 case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: 275 case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: 276 case PIPE_CAP_TGSI_VOTE: 277 case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: 278 case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: 279 case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: 280 case PIPE_CAP_NATIVE_FENCE_FD: 281 case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: 282 case PIPE_CAP_TGSI_FS_FBFETCH: 283 case PIPE_CAP_DOUBLES: 284 case PIPE_CAP_INT64: 285 case PIPE_CAP_INT64_DIVMOD: 286 case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: 287 case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE: 288 case PIPE_CAP_TGSI_BALLOT: 289 case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: 290 case PIPE_CAP_POST_DEPTH_COVERAGE: 291 case PIPE_CAP_BINDLESS_TEXTURE: 292 case PIPE_CAP_NIR_SAMPLERS_AS_DEREF: 293 case PIPE_CAP_QUERY_SO_OVERFLOW: 294 case PIPE_CAP_MEMOBJ: 295 case PIPE_CAP_LOAD_CONSTBUF: 296 case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS: 297 case PIPE_CAP_TILE_RASTER_ORDER: 298 case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES: 299 case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS: 300 case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: 301 case PIPE_CAP_CONTEXT_PRIORITY_MASK: 302 case PIPE_CAP_FENCE_SIGNAL: 303 case PIPE_CAP_CONSTBUF0_FLAGS: 304 case PIPE_CAP_PACKED_UNIFORMS: 305 case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: 306 case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: 307 case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: 308 case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: 309 case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: 310 case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: 311 case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: 312 case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS: 313 case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS: 314 case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS: 315 case PIPE_CAP_SURFACE_SAMPLE_COUNT: 316 case PIPE_CAP_TGSI_ATOMFADD: 317 case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE: 318 case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND: 319 case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: 320 case PIPE_CAP_NIR_COMPACT_ARRAYS: 321 case PIPE_CAP_COMPUTE: 322 case PIPE_CAP_IMAGE_LOAD_FORMATTED: 323 return 0; 324 325 case PIPE_CAP_VENDOR_ID: 326 return 0x10de; 327 case PIPE_CAP_DEVICE_ID: { 328 uint64_t device_id; 329 if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) { 330 NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n"); 331 return -1; 332 } 333 return device_id; 334 } 335 case PIPE_CAP_ACCELERATED: 336 return 1; 337 case PIPE_CAP_VIDEO_MEMORY: 338 return dev->vram_size >> 20; 339 case PIPE_CAP_UMA: 340 return 0; 341 default: 342 debug_printf("%s: unhandled cap %d\n", __func__, param); 343 return u_pipe_screen_get_param_defaults(pscreen, param); 344 } 345} 346 347static int 348nv50_screen_get_shader_param(struct pipe_screen *pscreen, 349 enum pipe_shader_type shader, 350 enum pipe_shader_cap param) 351{ 352 const struct nouveau_screen *screen = nouveau_screen(pscreen); 353 354 switch (shader) { 355 case PIPE_SHADER_VERTEX: 356 case PIPE_SHADER_GEOMETRY: 357 case PIPE_SHADER_FRAGMENT: 358 break; 359 case PIPE_SHADER_COMPUTE: 360 default: 361 return 0; 362 } 363 364 switch (param) { 365 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: 366 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: 367 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: 368 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: 369 return 16384; 370 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: 371 return 4; 372 case PIPE_SHADER_CAP_MAX_INPUTS: 373 if (shader == PIPE_SHADER_VERTEX) 374 return 32; 375 return 15; 376 case PIPE_SHADER_CAP_MAX_OUTPUTS: 377 return 16; 378 case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: 379 return 65536; 380 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: 381 return NV50_MAX_PIPE_CONSTBUFS; 382 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: 383 return shader != PIPE_SHADER_FRAGMENT; 384 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: 385 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: 386 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: 387 return 1; 388 case PIPE_SHADER_CAP_MAX_TEMPS: 389 return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE; 390 case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: 391 return 1; 392 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: 393 return 1; 394 case PIPE_SHADER_CAP_INT64_ATOMICS: 395 case PIPE_SHADER_CAP_FP16: 396 case PIPE_SHADER_CAP_SUBROUTINES: 397 return 0; /* please inline, or provide function declarations */ 398 case PIPE_SHADER_CAP_INTEGERS: 399 return 1; 400 case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS: 401 return 1; 402 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: 403 /* The chip could handle more sampler views than samplers */ 404 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: 405 return MIN2(16, PIPE_MAX_SAMPLERS); 406 case PIPE_SHADER_CAP_PREFERRED_IR: 407 return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; 408 case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: 409 return 32; 410 case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: 411 case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: 412 case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED: 413 case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: 414 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: 415 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: 416 case PIPE_SHADER_CAP_SUPPORTED_IRS: 417 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: 418 case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: 419 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS: 420 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS: 421 return 0; 422 case PIPE_SHADER_CAP_SCALAR_ISA: 423 return 1; 424 default: 425 NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); 426 return 0; 427 } 428} 429 430static float 431nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) 432{ 433 switch (param) { 434 case PIPE_CAPF_MAX_LINE_WIDTH: 435 case PIPE_CAPF_MAX_LINE_WIDTH_AA: 436 return 10.0f; 437 case PIPE_CAPF_MAX_POINT_WIDTH: 438 case PIPE_CAPF_MAX_POINT_WIDTH_AA: 439 return 64.0f; 440 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: 441 return 16.0f; 442 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: 443 return 4.0f; 444 case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: 445 case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: 446 case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: 447 return 0.0f; 448 } 449 450 NOUVEAU_ERR("unknown PIPE_CAPF %d\n", param); 451 return 0.0f; 452} 453 454static int 455nv50_screen_get_compute_param(struct pipe_screen *pscreen, 456 enum pipe_shader_ir ir_type, 457 enum pipe_compute_cap param, void *data) 458{ 459 struct nv50_screen *screen = nv50_screen(pscreen); 460 461#define RET(x) do { \ 462 if (data) \ 463 memcpy(data, x, sizeof(x)); \ 464 return sizeof(x); \ 465} while (0) 466 467 switch (param) { 468 case PIPE_COMPUTE_CAP_GRID_DIMENSION: 469 RET((uint64_t []) { 2 }); 470 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: 471 RET(((uint64_t []) { 65535, 65535 })); 472 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: 473 RET(((uint64_t []) { 512, 512, 64 })); 474 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: 475 RET((uint64_t []) { 512 }); 476 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */ 477 RET((uint64_t []) { 1ULL << 32 }); 478 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ 479 RET((uint64_t []) { 16 << 10 }); 480 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ 481 RET((uint64_t []) { 16 << 10 }); 482 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ 483 RET((uint64_t []) { 4096 }); 484 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: 485 RET((uint32_t []) { 32 }); 486 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: 487 RET((uint64_t []) { 1ULL << 40 }); 488 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: 489 RET((uint32_t []) { 0 }); 490 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: 491 RET((uint32_t []) { screen->mp_count }); 492 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: 493 RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ 494 case PIPE_COMPUTE_CAP_ADDRESS_BITS: 495 RET((uint32_t []) { 32 }); 496 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: 497 RET((uint64_t []) { 0 }); 498 default: 499 return 0; 500 } 501 502#undef RET 503} 504 505static void 506nv50_screen_destroy(struct pipe_screen *pscreen) 507{ 508 struct nv50_screen *screen = nv50_screen(pscreen); 509 510 if (!nouveau_drm_screen_unref(&screen->base)) 511 return; 512 513 if (screen->base.fence.current) { 514 struct nouveau_fence *current = NULL; 515 516 /* nouveau_fence_wait will create a new current fence, so wait on the 517 * _current_ one, and remove both. 518 */ 519 nouveau_fence_ref(screen->base.fence.current, ¤t); 520 nouveau_fence_wait(current, NULL); 521 nouveau_fence_ref(NULL, ¤t); 522 nouveau_fence_ref(NULL, &screen->base.fence.current); 523 } 524 if (screen->base.pushbuf) 525 screen->base.pushbuf->user_priv = NULL; 526 527 if (screen->blitter) 528 nv50_blitter_destroy(screen); 529 if (screen->pm.prog) { 530 screen->pm.prog->code = NULL; /* hardcoded, don't FREE */ 531 nv50_program_destroy(NULL, screen->pm.prog); 532 FREE(screen->pm.prog); 533 } 534 535 nouveau_bo_ref(NULL, &screen->code); 536 nouveau_bo_ref(NULL, &screen->tls_bo); 537 nouveau_bo_ref(NULL, &screen->stack_bo); 538 nouveau_bo_ref(NULL, &screen->txc); 539 nouveau_bo_ref(NULL, &screen->uniforms); 540 nouveau_bo_ref(NULL, &screen->fence.bo); 541 542 nouveau_heap_destroy(&screen->vp_code_heap); 543 nouveau_heap_destroy(&screen->gp_code_heap); 544 nouveau_heap_destroy(&screen->fp_code_heap); 545 546 FREE(screen->tic.entries); 547 548 nouveau_object_del(&screen->tesla); 549 nouveau_object_del(&screen->eng2d); 550 nouveau_object_del(&screen->m2mf); 551 nouveau_object_del(&screen->compute); 552 nouveau_object_del(&screen->sync); 553 554 nouveau_screen_fini(&screen->base); 555 556 FREE(screen); 557} 558 559static void 560nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence) 561{ 562 struct nv50_screen *screen = nv50_screen(pscreen); 563 struct nouveau_pushbuf *push = screen->base.pushbuf; 564 565 /* we need to do it after possible flush in MARK_RING */ 566 *sequence = ++screen->base.fence.sequence; 567 568 assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5); 569 PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4)); 570 PUSH_DATAh(push, screen->fence.bo->offset); 571 PUSH_DATA (push, screen->fence.bo->offset); 572 PUSH_DATA (push, *sequence); 573 PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 | 574 NV50_3D_QUERY_GET_UNK4 | 575 NV50_3D_QUERY_GET_UNIT_CROP | 576 NV50_3D_QUERY_GET_TYPE_QUERY | 577 NV50_3D_QUERY_GET_QUERY_SELECT_ZERO | 578 NV50_3D_QUERY_GET_SHORT); 579} 580 581static u32 582nv50_screen_fence_update(struct pipe_screen *pscreen) 583{ 584 return nv50_screen(pscreen)->fence.map[0]; 585} 586 587static void 588nv50_screen_init_hwctx(struct nv50_screen *screen) 589{ 590 struct nouveau_pushbuf *push = screen->base.pushbuf; 591 struct nv04_fifo *fifo; 592 unsigned i; 593 594 fifo = (struct nv04_fifo *)screen->base.channel->data; 595 596 BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1); 597 PUSH_DATA (push, screen->m2mf->handle); 598 BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3); 599 PUSH_DATA (push, screen->sync->handle); 600 PUSH_DATA (push, fifo->vram); 601 PUSH_DATA (push, fifo->vram); 602 603 BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1); 604 PUSH_DATA (push, screen->eng2d->handle); 605 BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4); 606 PUSH_DATA (push, screen->sync->handle); 607 PUSH_DATA (push, fifo->vram); 608 PUSH_DATA (push, fifo->vram); 609 PUSH_DATA (push, fifo->vram); 610 BEGIN_NV04(push, NV50_2D(OPERATION), 1); 611 PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY); 612 BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1); 613 PUSH_DATA (push, 0); 614 BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1); 615 PUSH_DATA (push, 0); 616 BEGIN_NV04(push, SUBC_2D(0x0888), 1); 617 PUSH_DATA (push, 1); 618 BEGIN_NV04(push, NV50_2D(COND_MODE), 1); 619 PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS); 620 621 BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); 622 PUSH_DATA (push, screen->tesla->handle); 623 624 BEGIN_NV04(push, NV50_3D(COND_MODE), 1); 625 PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); 626 627 BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1); 628 PUSH_DATA (push, screen->sync->handle); 629 BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11); 630 for (i = 0; i < 11; ++i) 631 PUSH_DATA(push, fifo->vram); 632 BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN); 633 for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i) 634 PUSH_DATA(push, fifo->vram); 635 636 BEGIN_NV04(push, NV50_3D(REG_MODE), 1); 637 PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED); 638 BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1); 639 PUSH_DATA (push, 0xf); 640 641 if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) { 642 BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1); 643 PUSH_DATA (push, 0x18); 644 } 645 646 BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1); 647 PUSH_DATA(push, screen->base.drm->version >= 0x01000101); 648 649 BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8); 650 for (i = 0; i < 8; ++i) 651 PUSH_DATA(push, screen->base.drm->version >= 0x01000101); 652 653 BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); 654 PUSH_DATA (push, 1); 655 656 BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1); 657 PUSH_DATA (push, 0); 658 BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1); 659 PUSH_DATA (push, 0); 660 BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1); 661 PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1); 662 BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1); 663 PUSH_DATA (push, 0); 664 BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1); 665 PUSH_DATA (push, 1); 666 BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1); 667 PUSH_DATA (push, 1); 668 669 if (screen->tesla->oclass >= NVA0_3D_CLASS) { 670 BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); 671 PUSH_DATA (push, 0); 672 } 673 674 BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1); 675 PUSH_DATA (push, 0); 676 BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2); 677 PUSH_DATA (push, 0); 678 PUSH_DATA (push, 0); 679 BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1); 680 PUSH_DATA (push, 0x3f); 681 682 BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2); 683 PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); 684 PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2)); 685 686 BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2); 687 PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); 688 PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2)); 689 690 BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2); 691 PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); 692 PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2)); 693 694 BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); 695 PUSH_DATAh(push, screen->tls_bo->offset); 696 PUSH_DATA (push, screen->tls_bo->offset); 697 PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); 698 699 BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3); 700 PUSH_DATAh(push, screen->stack_bo->offset); 701 PUSH_DATA (push, screen->stack_bo->offset); 702 PUSH_DATA (push, 4); 703 704 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); 705 PUSH_DATAh(push, screen->uniforms->offset + (0 << 16)); 706 PUSH_DATA (push, screen->uniforms->offset + (0 << 16)); 707 PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000); 708 709 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); 710 PUSH_DATAh(push, screen->uniforms->offset + (1 << 16)); 711 PUSH_DATA (push, screen->uniforms->offset + (1 << 16)); 712 PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000); 713 714 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); 715 PUSH_DATAh(push, screen->uniforms->offset + (2 << 16)); 716 PUSH_DATA (push, screen->uniforms->offset + (2 << 16)); 717 PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000); 718 719 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); 720 PUSH_DATAh(push, screen->uniforms->offset + (3 << 16)); 721 PUSH_DATA (push, screen->uniforms->offset + (3 << 16)); 722 PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff)); 723 724 BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3); 725 PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01); 726 PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21); 727 PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31); 728 729 /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */ 730 BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); 731 PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX); 732 BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4); 733 PUSH_DATAf(push, 0.0f); 734 PUSH_DATAf(push, 0.0f); 735 PUSH_DATAf(push, 0.0f); 736 PUSH_DATAf(push, 0.0f); 737 BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); 738 PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); 739 PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET); 740 741 nv50_upload_ms_info(push); 742 743 /* max TIC (bits 4:8) & TSC bindings, per program type */ 744 for (i = 0; i < 3; ++i) { 745 BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1); 746 PUSH_DATA (push, 0x54); 747 } 748 749 BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3); 750 PUSH_DATAh(push, screen->txc->offset); 751 PUSH_DATA (push, screen->txc->offset); 752 PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); 753 754 BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3); 755 PUSH_DATAh(push, screen->txc->offset + 65536); 756 PUSH_DATA (push, screen->txc->offset + 65536); 757 PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); 758 759 BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1); 760 PUSH_DATA (push, 0); 761 762 BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1); 763 PUSH_DATA (push, 0); 764 BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1); 765 PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY); 766 BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2); 767 for (i = 0; i < 8 * 2; ++i) 768 PUSH_DATA(push, 0); 769 BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1); 770 PUSH_DATA (push, 0); 771 772 BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1); 773 PUSH_DATA (push, 1); 774 for (i = 0; i < NV50_MAX_VIEWPORTS; i++) { 775 BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2); 776 PUSH_DATAf(push, 0.0f); 777 PUSH_DATAf(push, 1.0f); 778 BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(i)), 2); 779 PUSH_DATA (push, 8192 << 16); 780 PUSH_DATA (push, 8192 << 16); 781 } 782 783 BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1); 784#ifdef NV50_SCISSORS_CLIPPING 785 PUSH_DATA (push, 0x0000); 786#else 787 PUSH_DATA (push, 0x1080); 788#endif 789 790 BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1); 791 PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT); 792 793 /* We use scissors instead of exact view volume clipping, 794 * so they're always enabled. 795 */ 796 for (i = 0; i < NV50_MAX_VIEWPORTS; i++) { 797 BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(i)), 3); 798 PUSH_DATA (push, 1); 799 PUSH_DATA (push, 8192 << 16); 800 PUSH_DATA (push, 8192 << 16); 801 } 802 803 BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); 804 PUSH_DATA (push, 1); 805 BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1); 806 PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL); 807 BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1); 808 PUSH_DATA (push, 0x11111111); 809 BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1); 810 PUSH_DATA (push, 1); 811 812 BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1); 813 PUSH_DATA (push, 0); 814 if (screen->base.class_3d >= NV84_3D_CLASS) { 815 BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1); 816 PUSH_DATA (push, 0); 817 } 818 819 BEGIN_NV04(push, NV50_3D(UNK0FDC), 1); 820 PUSH_DATA (push, 1); 821 BEGIN_NV04(push, NV50_3D(UNK19C0), 1); 822 PUSH_DATA (push, 1); 823 824 PUSH_KICK (push); 825} 826 827static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space, 828 uint64_t *tls_size) 829{ 830 struct nouveau_device *dev = screen->base.device; 831 int ret; 832 833 screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) * 834 ONE_TEMP_SIZE; 835 if (nouveau_mesa_debug) 836 debug_printf("allocating space for %u temps\n", 837 util_next_power_of_two(tls_space / ONE_TEMP_SIZE)); 838 *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) * 839 screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP; 840 841 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 842 *tls_size, NULL, &screen->tls_bo); 843 if (ret) { 844 NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret); 845 return ret; 846 } 847 848 return 0; 849} 850 851int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space) 852{ 853 struct nouveau_pushbuf *push = screen->base.pushbuf; 854 int ret; 855 uint64_t tls_size; 856 857 if (tls_space < screen->cur_tls_space) 858 return 0; 859 if (tls_space > screen->max_tls_space) { 860 /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC / 861 * LOCAL_WARPS_NO_CLAMP) */ 862 NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n", 863 (unsigned)(tls_space / ONE_TEMP_SIZE), 864 (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE)); 865 return -ENOMEM; 866 } 867 868 nouveau_bo_ref(NULL, &screen->tls_bo); 869 ret = nv50_tls_alloc(screen, tls_space, &tls_size); 870 if (ret) 871 return ret; 872 873 BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3); 874 PUSH_DATAh(push, screen->tls_bo->offset); 875 PUSH_DATA (push, screen->tls_bo->offset); 876 PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8)); 877 878 return 1; 879} 880 881static const nir_shader_compiler_options nir_options = { 882 .fuse_ffma = false, /* nir doesn't track mad vs fma */ 883 .lower_flrp32 = true, 884 .lower_flrp64 = true, 885 .lower_fpow = false, 886 .lower_fmod64 = true, 887 .lower_uadd_carry = true, 888 .lower_usub_borrow = true, 889 .lower_ffract = true, 890 .lower_pack_half_2x16 = true, 891 .lower_pack_unorm_2x16 = true, 892 .lower_pack_snorm_2x16 = true, 893 .lower_pack_unorm_4x8 = true, 894 .lower_pack_snorm_4x8 = true, 895 .lower_unpack_half_2x16 = true, 896 .lower_unpack_unorm_2x16 = true, 897 .lower_unpack_snorm_2x16 = true, 898 .lower_unpack_unorm_4x8 = true, 899 .lower_unpack_snorm_4x8 = true, 900 .lower_extract_byte = true, 901 .lower_extract_word = true, 902 .lower_all_io_to_temps = false, 903 .native_integers = true, 904 .lower_cs_local_index_from_id = true, 905 .use_interpolated_input_intrinsics = true, 906 .max_unroll_iterations = 32, 907}; 908 909static const void * 910nv50_screen_get_compiler_options(struct pipe_screen *pscreen, 911 enum pipe_shader_ir ir, 912 enum pipe_shader_type shader) 913{ 914 if (ir == PIPE_SHADER_IR_NIR) 915 return &nir_options; 916 return NULL; 917} 918 919struct nouveau_screen * 920nv50_screen_create(struct nouveau_device *dev) 921{ 922 struct nv50_screen *screen; 923 struct pipe_screen *pscreen; 924 struct nouveau_object *chan; 925 uint64_t value; 926 uint32_t tesla_class; 927 unsigned stack_size; 928 int ret; 929 930 screen = CALLOC_STRUCT(nv50_screen); 931 if (!screen) 932 return NULL; 933 pscreen = &screen->base.base; 934 pscreen->destroy = nv50_screen_destroy; 935 936 ret = nouveau_screen_init(&screen->base, dev); 937 if (ret) { 938 NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret); 939 goto fail; 940 } 941 942 /* TODO: Prevent FIFO prefetch before transfer of index buffers and 943 * admit them to VRAM. 944 */ 945 screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER | 946 PIPE_BIND_VERTEX_BUFFER; 947 screen->base.sysmem_bindings |= 948 PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER; 949 950 screen->base.pushbuf->user_priv = screen; 951 screen->base.pushbuf->rsvd_kick = 5; 952 953 chan = screen->base.channel; 954 955 pscreen->context_create = nv50_create; 956 pscreen->is_format_supported = nv50_screen_is_format_supported; 957 pscreen->get_param = nv50_screen_get_param; 958 pscreen->get_shader_param = nv50_screen_get_shader_param; 959 pscreen->get_paramf = nv50_screen_get_paramf; 960 pscreen->get_compute_param = nv50_screen_get_compute_param; 961 pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; 962 pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; 963 964 /* nir stuff */ 965 pscreen->get_compiler_options = nv50_screen_get_compiler_options; 966 967 nv50_screen_init_resource_functions(pscreen); 968 969 if (screen->base.device->chipset < 0x84 || 970 debug_get_bool_option("NOUVEAU_PMPEG", false)) { 971 /* PMPEG */ 972 nouveau_screen_init_vdec(&screen->base); 973 } else if (screen->base.device->chipset < 0x98 || 974 screen->base.device->chipset == 0xa0) { 975 /* VP2 */ 976 screen->base.base.get_video_param = nv84_screen_get_video_param; 977 screen->base.base.is_video_format_supported = nv84_screen_video_supported; 978 } else { 979 /* VP3/4 */ 980 screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param; 981 screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported; 982 } 983 984 ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, 985 NULL, &screen->fence.bo); 986 if (ret) { 987 NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret); 988 goto fail; 989 } 990 991 nouveau_bo_map(screen->fence.bo, 0, NULL); 992 screen->fence.map = screen->fence.bo->map; 993 screen->base.fence.emit = nv50_screen_fence_emit; 994 screen->base.fence.update = nv50_screen_fence_update; 995 996 ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS, 997 &(struct nv04_notify){ .length = 32 }, 998 sizeof(struct nv04_notify), &screen->sync); 999 if (ret) { 1000 NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret); 1001 goto fail; 1002 } 1003 1004 ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS, 1005 NULL, 0, &screen->m2mf); 1006 if (ret) { 1007 NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret); 1008 goto fail; 1009 } 1010 1011 ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS, 1012 NULL, 0, &screen->eng2d); 1013 if (ret) { 1014 NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret); 1015 goto fail; 1016 } 1017 1018 switch (dev->chipset & 0xf0) { 1019 case 0x50: 1020 tesla_class = NV50_3D_CLASS; 1021 break; 1022 case 0x80: 1023 case 0x90: 1024 tesla_class = NV84_3D_CLASS; 1025 break; 1026 case 0xa0: 1027 switch (dev->chipset) { 1028 case 0xa0: 1029 case 0xaa: 1030 case 0xac: 1031 tesla_class = NVA0_3D_CLASS; 1032 break; 1033 case 0xaf: 1034 tesla_class = NVAF_3D_CLASS; 1035 break; 1036 default: 1037 tesla_class = NVA3_3D_CLASS; 1038 break; 1039 } 1040 break; 1041 default: 1042 NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset); 1043 goto fail; 1044 } 1045 screen->base.class_3d = tesla_class; 1046 1047 ret = nouveau_object_new(chan, 0xbeef5097, tesla_class, 1048 NULL, 0, &screen->tesla); 1049 if (ret) { 1050 NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret); 1051 goto fail; 1052 } 1053 1054 /* This over-allocates by a page. The GP, which would execute at the end of 1055 * the last page, would trigger faults. The going theory is that it 1056 * prefetches up to a certain amount. 1057 */ 1058 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 1059 (3 << NV50_CODE_BO_SIZE_LOG2) + 0x1000, 1060 NULL, &screen->code); 1061 if (ret) { 1062 NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret); 1063 goto fail; 1064 } 1065 1066 nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); 1067 nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); 1068 nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); 1069 1070 nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); 1071 1072 screen->TPs = util_bitcount(value & 0xffff); 1073 screen->MPsInTP = util_bitcount(value & 0x0f000000); 1074 1075 screen->mp_count = screen->TPs * screen->MPsInTP; 1076 1077 stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP * 1078 STACK_WARPS_ALLOC * 64 * 8; 1079 1080 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL, 1081 &screen->stack_bo); 1082 if (ret) { 1083 NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret); 1084 goto fail; 1085 } 1086 1087 uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) * 1088 screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP * 1089 ONE_TEMP_SIZE; 1090 screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE; 1091 screen->max_tls_space /= 2; /* half of vram */ 1092 1093 /* hw can address max 64 KiB */ 1094 screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10); 1095 1096 uint64_t tls_size; 1097 unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE; 1098 ret = nv50_tls_alloc(screen, tls_space, &tls_size); 1099 if (ret) 1100 goto fail; 1101 1102 if (nouveau_mesa_debug) 1103 debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n", 1104 screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10); 1105 1106 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL, 1107 &screen->uniforms); 1108 if (ret) { 1109 NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret); 1110 goto fail; 1111 } 1112 1113 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL, 1114 &screen->txc); 1115 if (ret) { 1116 NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret); 1117 goto fail; 1118 } 1119 1120 screen->tic.entries = CALLOC(4096, sizeof(void *)); 1121 screen->tsc.entries = screen->tic.entries + 2048; 1122 1123 if (!nv50_blitter_create(screen)) 1124 goto fail; 1125 1126 nv50_screen_init_hwctx(screen); 1127 1128 ret = nv50_screen_compute_setup(screen, screen->base.pushbuf); 1129 if (ret) { 1130 NOUVEAU_ERR("Failed to init compute context: %d\n", ret); 1131 goto fail; 1132 } 1133 1134 nouveau_fence_new(&screen->base, &screen->base.fence.current); 1135 1136 return &screen->base; 1137 1138fail: 1139 screen->base.base.context_create = NULL; 1140 return &screen->base; 1141} 1142 1143int 1144nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry) 1145{ 1146 int i = screen->tic.next; 1147 1148 while (screen->tic.lock[i / 32] & (1 << (i % 32))) 1149 i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); 1150 1151 screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1); 1152 1153 if (screen->tic.entries[i]) 1154 nv50_tic_entry(screen->tic.entries[i])->id = -1; 1155 1156 screen->tic.entries[i] = entry; 1157 return i; 1158} 1159 1160int 1161nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry) 1162{ 1163 int i = screen->tsc.next; 1164 1165 while (screen->tsc.lock[i / 32] & (1 << (i % 32))) 1166 i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); 1167 1168 screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1); 1169 1170 if (screen->tsc.entries[i]) 1171 nv50_tsc_entry(screen->tsc.entries[i])->id = -1; 1172 1173 screen->tsc.entries[i] = entry; 1174 return i; 1175} 1176