1/* 2 Copyright 2003 VMware, Inc. 3 Copyright (C) Intel Corp. 2006. All Rights Reserved. 4 Intel funded Tungsten Graphics to 5 develop this 3D driver. 6 7 Permission is hereby granted, free of charge, to any person obtaining 8 a copy of this software and associated documentation files (the 9 "Software"), to deal in the Software without restriction, including 10 without limitation the rights to use, copy, modify, merge, publish, 11 distribute, sublicense, and/or sell copies of the Software, and to 12 permit persons to whom the Software is furnished to do so, subject to 13 the following conditions: 14 15 The above copyright notice and this permission notice (including the 16 next paragraph) shall be included in all copies or substantial 17 portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 27 **********************************************************************/ 28 /* 29 * Authors: 30 * Keith Whitwell <keithw@vmware.com> 31 */ 32 33 34#include "compiler/nir/nir.h" 35#include "main/api_exec.h" 36#include "main/context.h" 37#include "main/fbobject.h" 38#include "main/extensions.h" 39#include "main/glthread.h" 40#include "main/imports.h" 41#include "main/macros.h" 42#include "main/points.h" 43#include "main/version.h" 44#include "main/vtxfmt.h" 45#include "main/texobj.h" 46#include "main/framebuffer.h" 47#include "main/stencil.h" 48#include "main/state.h" 49 50#include "vbo/vbo.h" 51 52#include "drivers/common/driverfuncs.h" 53#include "drivers/common/meta.h" 54#include "utils.h" 55 56#include "brw_context.h" 57#include "brw_defines.h" 58#include "brw_blorp.h" 59#include "brw_draw.h" 60#include "brw_state.h" 61 62#include "intel_batchbuffer.h" 63#include "intel_buffer_objects.h" 64#include "intel_buffers.h" 65#include "intel_fbo.h" 66#include "intel_mipmap_tree.h" 67#include "intel_pixel.h" 68#include "intel_image.h" 69#include "intel_tex.h" 70#include "intel_tex_obj.h" 71 72#include "swrast_setup/swrast_setup.h" 73#include "tnl/tnl.h" 74#include "tnl/t_pipeline.h" 75#include "util/ralloc.h" 76#include "util/debug.h" 77#include "util/disk_cache.h" 78#include "isl/isl.h" 79 80#include "common/gen_defines.h" 81 82#include "compiler/spirv/nir_spirv.h" 83/*************************************** 84 * Mesa's Driver Functions 85 ***************************************/ 86 87const char *const brw_vendor_string = "Intel Open Source Technology Center"; 88 89static const char * 90get_bsw_model(const struct intel_screen *screen) 91{ 92 switch (screen->eu_total) { 93 case 16: 94 return "405"; 95 case 12: 96 return "400"; 97 default: 98 return " "; 99 } 100} 101 102const char * 103brw_get_renderer_string(const struct intel_screen *screen) 104{ 105 const char *chipset; 106 static char buffer[128]; 107 char *bsw = NULL; 108 109 switch (screen->deviceID) { 110#undef CHIPSET 111#define CHIPSET(id, symbol, str) case id: chipset = str; break; 112#include "pci_ids/i965_pci_ids.h" 113 default: 114 chipset = "Unknown Intel Chipset"; 115 break; 116 } 117 118 /* Braswell branding is funny, so we have to fix it up here */ 119 if (screen->deviceID == 0x22B1) { 120 bsw = strdup(chipset); 121 char *needle = strstr(bsw, "XXX"); 122 if (needle) { 123 memcpy(needle, get_bsw_model(screen), 3); 124 chipset = bsw; 125 } 126 } 127 128 (void) driGetRendererString(buffer, chipset, 0); 129 free(bsw); 130 return buffer; 131} 132 133static const GLubyte * 134intel_get_string(struct gl_context * ctx, GLenum name) 135{ 136 const struct brw_context *const brw = brw_context(ctx); 137 138 switch (name) { 139 case GL_VENDOR: 140 return (GLubyte *) brw_vendor_string; 141 142 case GL_RENDERER: 143 return 144 (GLubyte *) brw_get_renderer_string(brw->screen); 145 146 default: 147 return NULL; 148 } 149} 150 151static void 152brw_set_background_context(struct gl_context *ctx, 153 struct util_queue_monitoring *queue_info) 154{ 155 struct brw_context *brw = brw_context(ctx); 156 __DRIcontext *driContext = brw->driContext; 157 __DRIscreen *driScreen = driContext->driScreenPriv; 158 const __DRIbackgroundCallableExtension *backgroundCallable = 159 driScreen->dri2.backgroundCallable; 160 161 /* Note: Mesa will only call this function if we've called 162 * _mesa_enable_multithreading(). We only do that if the loader exposed 163 * the __DRI_BACKGROUND_CALLABLE extension. So we know that 164 * backgroundCallable is not NULL. 165 */ 166 backgroundCallable->setBackgroundContext(driContext->loaderPrivate); 167} 168 169static void 170intel_viewport(struct gl_context *ctx) 171{ 172 struct brw_context *brw = brw_context(ctx); 173 __DRIcontext *driContext = brw->driContext; 174 175 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { 176 if (driContext->driDrawablePriv) 177 dri2InvalidateDrawable(driContext->driDrawablePriv); 178 if (driContext->driReadablePriv) 179 dri2InvalidateDrawable(driContext->driReadablePriv); 180 } 181} 182 183static void 184intel_update_framebuffer(struct gl_context *ctx, 185 struct gl_framebuffer *fb) 186{ 187 struct brw_context *brw = brw_context(ctx); 188 189 /* Quantize the derived default number of samples 190 */ 191 fb->DefaultGeometry._NumSamples = 192 intel_quantize_num_samples(brw->screen, 193 fb->DefaultGeometry.NumSamples); 194} 195 196static void 197intel_update_state(struct gl_context * ctx) 198{ 199 GLuint new_state = ctx->NewState; 200 struct brw_context *brw = brw_context(ctx); 201 202 if (ctx->swrast_context) 203 _swrast_InvalidateState(ctx, new_state); 204 205 brw->NewGLState |= new_state; 206 207 if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) 208 _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer); 209 210 if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) { 211 brw->stencil_enabled = _mesa_stencil_is_enabled(ctx); 212 brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx); 213 brw->stencil_write_enabled = 214 _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided); 215 } 216 217 if (new_state & _NEW_POLYGON) 218 brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx); 219 220 if (new_state & _NEW_BUFFERS) { 221 intel_update_framebuffer(ctx, ctx->DrawBuffer); 222 if (ctx->DrawBuffer != ctx->ReadBuffer) 223 intel_update_framebuffer(ctx, ctx->ReadBuffer); 224 } 225} 226 227#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer) 228 229static void 230intel_flush_front(struct gl_context *ctx) 231{ 232 struct brw_context *brw = brw_context(ctx); 233 __DRIcontext *driContext = brw->driContext; 234 __DRIdrawable *driDrawable = driContext->driDrawablePriv; 235 __DRIscreen *const dri_screen = brw->screen->driScrnPriv; 236 237 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { 238 if (flushFront(dri_screen) && driDrawable && 239 driDrawable->loaderPrivate) { 240 241 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. 242 * 243 * This potentially resolves both front and back buffer. It 244 * is unnecessary to resolve the back, but harms nothing except 245 * performance. And no one cares about front-buffer render 246 * performance. 247 */ 248 intel_resolve_for_dri2_flush(brw, driDrawable); 249 intel_batchbuffer_flush(brw); 250 251 flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate); 252 253 /* We set the dirty bit in intel_prepare_render() if we're 254 * front buffer rendering once we get there. 255 */ 256 brw->front_buffer_dirty = false; 257 } 258 } 259} 260 261static void 262brw_display_shared_buffer(struct brw_context *brw) 263{ 264 __DRIcontext *dri_context = brw->driContext; 265 __DRIdrawable *dri_drawable = dri_context->driDrawablePriv; 266 __DRIscreen *dri_screen = brw->screen->driScrnPriv; 267 int fence_fd = -1; 268 269 if (!brw->is_shared_buffer_bound) 270 return; 271 272 if (!brw->is_shared_buffer_dirty) 273 return; 274 275 if (brw->screen->has_exec_fence) { 276 /* This function is always called during a flush operation, so there is 277 * no need to flush again here. But we want to provide a fence_fd to the 278 * loader, and a redundant flush is the easiest way to acquire one. 279 */ 280 if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd)) 281 return; 282 } 283 284 dri_screen->mutableRenderBuffer.loader 285 ->displaySharedBuffer(dri_drawable, fence_fd, 286 dri_drawable->loaderPrivate); 287 brw->is_shared_buffer_dirty = false; 288} 289 290static void 291intel_glFlush(struct gl_context *ctx) 292{ 293 struct brw_context *brw = brw_context(ctx); 294 295 intel_batchbuffer_flush(brw); 296 intel_flush_front(ctx); 297 brw_display_shared_buffer(brw); 298 brw->need_flush_throttle = true; 299} 300 301static void 302intel_finish(struct gl_context * ctx) 303{ 304 struct brw_context *brw = brw_context(ctx); 305 306 intel_glFlush(ctx); 307 308 if (brw->batch.last_bo) 309 brw_bo_wait_rendering(brw->batch.last_bo); 310} 311 312static void 313brw_init_driver_functions(struct brw_context *brw, 314 struct dd_function_table *functions) 315{ 316 const struct gen_device_info *devinfo = &brw->screen->devinfo; 317 318 _mesa_init_driver_functions(functions); 319 320 /* GLX uses DRI2 invalidate events to handle window resizing. 321 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib), 322 * which doesn't provide a mechanism for snooping the event queues. 323 * 324 * So EGL still relies on viewport hacks to handle window resizing. 325 * This should go away with DRI3000. 326 */ 327 if (!brw->driContext->driScreenPriv->dri2.useInvalidate) 328 functions->Viewport = intel_viewport; 329 330 functions->Flush = intel_glFlush; 331 functions->Finish = intel_finish; 332 functions->GetString = intel_get_string; 333 functions->UpdateState = intel_update_state; 334 335 brw_init_draw_functions(functions); 336 intelInitTextureFuncs(functions); 337 intelInitTextureImageFuncs(functions); 338 intelInitTextureCopyImageFuncs(functions); 339 intelInitCopyImageFuncs(functions); 340 intelInitClearFuncs(functions); 341 intelInitBufferFuncs(functions); 342 intelInitPixelFuncs(functions); 343 intelInitBufferObjectFuncs(functions); 344 brw_init_syncobj_functions(functions); 345 brw_init_object_purgeable_functions(functions); 346 347 brwInitFragProgFuncs( functions ); 348 brw_init_common_queryobj_functions(functions); 349 if (devinfo->gen >= 8 || devinfo->is_haswell) 350 hsw_init_queryobj_functions(functions); 351 else if (devinfo->gen >= 6) 352 gen6_init_queryobj_functions(functions); 353 else 354 gen4_init_queryobj_functions(functions); 355 brw_init_compute_functions(functions); 356 brw_init_conditional_render_functions(functions); 357 358 functions->GenerateMipmap = brw_generate_mipmap; 359 360 functions->QueryInternalFormat = brw_query_internal_format; 361 362 functions->NewTransformFeedback = brw_new_transform_feedback; 363 functions->DeleteTransformFeedback = brw_delete_transform_feedback; 364 if (can_do_mi_math_and_lrr(brw->screen)) { 365 functions->BeginTransformFeedback = hsw_begin_transform_feedback; 366 functions->EndTransformFeedback = hsw_end_transform_feedback; 367 functions->PauseTransformFeedback = hsw_pause_transform_feedback; 368 functions->ResumeTransformFeedback = hsw_resume_transform_feedback; 369 } else if (devinfo->gen >= 7) { 370 functions->BeginTransformFeedback = gen7_begin_transform_feedback; 371 functions->EndTransformFeedback = gen7_end_transform_feedback; 372 functions->PauseTransformFeedback = gen7_pause_transform_feedback; 373 functions->ResumeTransformFeedback = gen7_resume_transform_feedback; 374 functions->GetTransformFeedbackVertexCount = 375 brw_get_transform_feedback_vertex_count; 376 } else { 377 functions->BeginTransformFeedback = brw_begin_transform_feedback; 378 functions->EndTransformFeedback = brw_end_transform_feedback; 379 functions->PauseTransformFeedback = brw_pause_transform_feedback; 380 functions->ResumeTransformFeedback = brw_resume_transform_feedback; 381 functions->GetTransformFeedbackVertexCount = 382 brw_get_transform_feedback_vertex_count; 383 } 384 385 if (devinfo->gen >= 6) 386 functions->GetSamplePosition = gen6_get_sample_position; 387 388 /* GL_ARB_get_program_binary */ 389 brw_program_binary_init(brw->screen->deviceID); 390 functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1; 391 functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary; 392 functions->ProgramBinaryDeserializeDriverBlob = 393 brw_deserialize_program_binary; 394 395 if (brw->screen->disk_cache) { 396 functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir; 397 } 398 399 functions->SetBackgroundContext = brw_set_background_context; 400} 401 402static void 403brw_initialize_spirv_supported_capabilities(struct brw_context *brw) 404{ 405 const struct gen_device_info *devinfo = &brw->screen->devinfo; 406 struct gl_context *ctx = &brw->ctx; 407 408 /* The following SPIR-V capabilities are only supported on gen7+. In theory 409 * you should enable the extension only on gen7+, but just in case let's 410 * assert it. 411 */ 412 assert(devinfo->gen >= 7); 413 414 ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7; 415 ctx->Const.SpirVCapabilities.draw_parameters = true; 416 ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8; 417 ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7; 418 ctx->Const.SpirVCapabilities.image_write_without_format = true; 419 ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8; 420 ctx->Const.SpirVCapabilities.tessellation = true; 421 ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7; 422 ctx->Const.SpirVCapabilities.variable_pointers = true; 423} 424 425static void 426brw_initialize_context_constants(struct brw_context *brw) 427{ 428 const struct gen_device_info *devinfo = &brw->screen->devinfo; 429 struct gl_context *ctx = &brw->ctx; 430 const struct brw_compiler *compiler = brw->screen->compiler; 431 432 const bool stage_exists[MESA_SHADER_STAGES] = { 433 [MESA_SHADER_VERTEX] = true, 434 [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7, 435 [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7, 436 [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6, 437 [MESA_SHADER_FRAGMENT] = true, 438 [MESA_SHADER_COMPUTE] = 439 (_mesa_is_desktop_gl(ctx) && 440 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) || 441 (ctx->API == API_OPENGLES2 && 442 ctx->Const.MaxComputeWorkGroupSize[0] >= 128), 443 }; 444 445 unsigned num_stages = 0; 446 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 447 if (stage_exists[i]) 448 num_stages++; 449 } 450 451 unsigned max_samplers = 452 devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16; 453 454 ctx->Const.MaxDualSourceDrawBuffers = 1; 455 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; 456 ctx->Const.MaxCombinedShaderOutputResources = 457 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS; 458 459 /* The timestamp register we can read for glGetTimestamp() is 460 * sometimes only 32 bits, before scaling to nanoseconds (depending 461 * on kernel). 462 * 463 * Once scaled to nanoseconds the timestamp would roll over at a 464 * non-power-of-two, so an application couldn't use 465 * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we 466 * report 36 bits and truncate at that (rolling over 5 times as 467 * often as the HW counter), and when the 32-bit counter rolls 468 * over, it happens to also be at a rollover in the reported value 469 * from near (1<<36) to 0. 470 * 471 * The low 32 bits rolls over in ~343 seconds. Our 36-bit result 472 * rolls over every ~69 seconds. 473 */ 474 ctx->Const.QueryCounterBits.Timestamp = 36; 475 476 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ 477 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; 478 if (devinfo->gen >= 7) { 479 ctx->Const.MaxRenderbufferSize = 16384; 480 ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS); 481 ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */ 482 } else { 483 ctx->Const.MaxRenderbufferSize = 8192; 484 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); 485 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ 486 } 487 ctx->Const.Max3DTextureLevels = 12; /* 2048 */ 488 ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512; 489 ctx->Const.MaxTextureMbytes = 1536; 490 ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192; 491 ctx->Const.MaxTextureMaxAnisotropy = 16.0; 492 ctx->Const.MaxTextureLodBias = 15.0; 493 ctx->Const.StripTextureBorder = true; 494 if (devinfo->gen >= 7) { 495 ctx->Const.MaxProgramTextureGatherComponents = 4; 496 ctx->Const.MinProgramTextureGatherOffset = -32; 497 ctx->Const.MaxProgramTextureGatherOffset = 31; 498 } else if (devinfo->gen == 6) { 499 ctx->Const.MaxProgramTextureGatherComponents = 1; 500 ctx->Const.MinProgramTextureGatherOffset = -8; 501 ctx->Const.MaxProgramTextureGatherOffset = 7; 502 } 503 504 ctx->Const.MaxUniformBlockSize = 65536; 505 506 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 507 struct gl_program_constants *prog = &ctx->Const.Program[i]; 508 509 if (!stage_exists[i]) 510 continue; 511 512 prog->MaxTextureImageUnits = max_samplers; 513 514 prog->MaxUniformBlocks = BRW_MAX_UBO; 515 prog->MaxCombinedUniformComponents = 516 prog->MaxUniformComponents + 517 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks; 518 519 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS; 520 prog->MaxAtomicBuffers = BRW_MAX_ABO; 521 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0; 522 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO; 523 } 524 525 ctx->Const.MaxTextureUnits = 526 MIN2(ctx->Const.MaxTextureCoordUnits, 527 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits); 528 529 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO; 530 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO; 531 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO; 532 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO; 533 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO; 534 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers; 535 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES; 536 537 538 /* Hardware only supports a limited number of transform feedback buffers. 539 * So we need to override the Mesa default (which is based only on software 540 * limits). 541 */ 542 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS; 543 544 /* On Gen6, in the worst case, we use up one binding table entry per 545 * transform feedback component (see comments above the definition of 546 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value 547 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to 548 * BRW_MAX_SOL_BINDINGS. 549 * 550 * In "separate components" mode, we need to divide this value by 551 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries 552 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS. 553 */ 554 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS; 555 ctx->Const.MaxTransformFeedbackSeparateComponents = 556 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; 557 558 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = 559 !can_do_mi_math_and_lrr(brw->screen); 560 561 int max_samples; 562 const int *msaa_modes = intel_supported_msaa_modes(brw->screen); 563 const int clamp_max_samples = 564 driQueryOptioni(&brw->optionCache, "clamp_max_samples"); 565 566 if (clamp_max_samples < 0) { 567 max_samples = msaa_modes[0]; 568 } else { 569 /* Select the largest supported MSAA mode that does not exceed 570 * clamp_max_samples. 571 */ 572 max_samples = 0; 573 for (int i = 0; msaa_modes[i] != 0; ++i) { 574 if (msaa_modes[i] <= clamp_max_samples) { 575 max_samples = msaa_modes[i]; 576 break; 577 } 578 } 579 } 580 581 ctx->Const.MaxSamples = max_samples; 582 ctx->Const.MaxColorTextureSamples = max_samples; 583 ctx->Const.MaxDepthTextureSamples = max_samples; 584 ctx->Const.MaxIntegerSamples = max_samples; 585 ctx->Const.MaxImageSamples = 0; 586 587 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used 588 * to map indices of rectangular grid to sample numbers within a pixel. 589 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled 590 * extension implementation. For more details see the comment above 591 * gen6_set_sample_maps() definition. 592 */ 593 gen6_set_sample_maps(ctx); 594 595 ctx->Const.MinLineWidth = 1.0; 596 ctx->Const.MinLineWidthAA = 1.0; 597 if (devinfo->gen >= 6) { 598 ctx->Const.MaxLineWidth = 7.375; 599 ctx->Const.MaxLineWidthAA = 7.375; 600 ctx->Const.LineWidthGranularity = 0.125; 601 } else { 602 ctx->Const.MaxLineWidth = 7.0; 603 ctx->Const.MaxLineWidthAA = 7.0; 604 ctx->Const.LineWidthGranularity = 0.5; 605 } 606 607 /* For non-antialiased lines, we have to round the line width to the 608 * nearest whole number. Make sure that we don't advertise a line 609 * width that, when rounded, will be beyond the actual hardware 610 * maximum. 611 */ 612 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth); 613 614 ctx->Const.MinPointSize = 1.0; 615 ctx->Const.MinPointSizeAA = 1.0; 616 ctx->Const.MaxPointSize = 255.0; 617 ctx->Const.MaxPointSizeAA = 255.0; 618 ctx->Const.PointSizeGranularity = 1.0; 619 620 if (devinfo->gen >= 5 || devinfo->is_g4x) 621 ctx->Const.MaxClipPlanes = 8; 622 623 ctx->Const.GLSLTessLevelsAsInputs = true; 624 ctx->Const.PrimitiveRestartForPatches = true; 625 626 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; 627 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0; 628 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0; 629 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0; 630 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0; 631 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0; 632 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0; 633 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16; 634 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256; 635 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1; 636 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024; 637 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams = 638 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters, 639 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams); 640 641 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024; 642 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024; 643 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024; 644 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024; 645 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12; 646 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256; 647 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0; 648 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024; 649 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams = 650 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters, 651 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams); 652 653 /* Fragment shaders use real, 32-bit twos-complement integers for all 654 * integer types. 655 */ 656 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31; 657 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30; 658 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0; 659 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 660 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt; 661 662 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31; 663 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30; 664 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0; 665 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 666 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt; 667 668 /* Gen6 converts quads to polygon in beginning of 3D pipeline, 669 * but we're not sure how it's actually done for vertex order, 670 * that affect provoking vertex decision. Always use last vertex 671 * convention for quad primitive which works as expected for now. 672 */ 673 if (devinfo->gen >= 6) 674 ctx->Const.QuadsFollowProvokingVertexConvention = false; 675 676 ctx->Const.NativeIntegers = true; 677 678 /* Regarding the CMP instruction, the Ivybridge PRM says: 679 * 680 * "For each enabled channel 0b or 1b is assigned to the appropriate flag 681 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord 682 * 0xFFFFFFFF) is assigned to dst." 683 * 684 * but PRMs for earlier generations say 685 * 686 * "In dword format, one GRF may store up to 8 results. When the register 687 * is used later as a vector of Booleans, as only LSB at each channel 688 * contains meaning [sic] data, software should make sure all higher bits 689 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)." 690 * 691 * We select the representation of a true boolean uniform to be ~0, and fix 692 * the results of Gen <= 5 CMP instruction's with -(result & 1). 693 */ 694 ctx->Const.UniformBooleanTrue = ~0; 695 696 /* From the gen4 PRM, volume 4 page 127: 697 * 698 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies 699 * the base address of the first element of the surface, computed in 700 * software by adding the surface base address to the byte offset of 701 * the element in the buffer." 702 * 703 * However, unaligned accesses are slower, so enforce buffer alignment. 704 * 705 * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional 706 * restriction: the start of the buffer needs to be 32B aligned. 707 */ 708 ctx->Const.UniformBufferOffsetAlignment = 32; 709 710 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so 711 * that we can safely have the CPU and GPU writing the same SSBO on 712 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never 713 * writes, so there's no problem. For an SSBO, the GPU and the CPU can 714 * be updating disjoint regions of the buffer simultaneously and that will 715 * break if the regions overlap the same cacheline. 716 */ 717 ctx->Const.ShaderStorageBufferOffsetAlignment = 64; 718 ctx->Const.TextureBufferOffsetAlignment = 16; 719 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024; 720 721 if (devinfo->gen >= 6) { 722 ctx->Const.MaxVarying = 32; 723 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128; 724 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 725 compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64; 726 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; 727 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; 728 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128; 729 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128; 730 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128; 731 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128; 732 } 733 734 /* We want the GLSL compiler to emit code that uses condition codes */ 735 for (int i = 0; i < MESA_SHADER_STAGES; i++) { 736 ctx->Const.ShaderCompilerOptions[i] = 737 brw->screen->compiler->glsl_compiler_options[i]; 738 } 739 740 if (devinfo->gen >= 7) { 741 ctx->Const.MaxViewportWidth = 32768; 742 ctx->Const.MaxViewportHeight = 32768; 743 } 744 745 /* ARB_viewport_array, OES_viewport_array */ 746 if (devinfo->gen >= 6) { 747 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; 748 ctx->Const.ViewportSubpixelBits = 8; 749 750 /* Cast to float before negating because MaxViewportWidth is unsigned. 751 */ 752 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth; 753 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth; 754 } 755 756 /* ARB_gpu_shader5 */ 757 if (devinfo->gen >= 7) 758 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS); 759 760 /* ARB_framebuffer_no_attachments */ 761 ctx->Const.MaxFramebufferWidth = 16384; 762 ctx->Const.MaxFramebufferHeight = 16384; 763 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers; 764 ctx->Const.MaxFramebufferSamples = max_samples; 765 766 /* OES_primitive_bounding_box */ 767 ctx->Const.NoPrimitiveBoundingBoxOutput = true; 768 769 /* TODO: We should be able to use STD430 packing by default on all hardware 770 * but some piglit tests [1] currently fail on SNB when this is enabled. 771 * The problem is the messages we're using for doing uniform pulls 772 * in the vec4 back-end on SNB is the OWORD block load instruction, which 773 * takes its offset in units of OWORDS (16 bytes). On IVB+, we use the 774 * sampler which doesn't have these restrictions. 775 * 776 * In the scalar back-end, we use the sampler for dynamic uniform loads and 777 * pull an entire cache line at a time for constant offset loads both of 778 * which support almost any alignment. 779 * 780 * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test 781 */ 782 if (devinfo->gen >= 7) 783 ctx->Const.UseSTD430AsDefaultPacking = true; 784 785 if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) 786 ctx->Const.AllowMappedBuffersDuringExecution = true; 787 788 /* GL_ARB_get_program_binary */ 789 ctx->Const.NumProgramBinaryFormats = 1; 790} 791 792static void 793brw_initialize_cs_context_constants(struct brw_context *brw) 794{ 795 struct gl_context *ctx = &brw->ctx; 796 const struct intel_screen *screen = brw->screen; 797 struct gen_device_info *devinfo = &brw->screen->devinfo; 798 799 /* FINISHME: Do this for all platforms that the kernel supports */ 800 if (devinfo->is_cherryview && 801 screen->subslice_total > 0 && screen->eu_total > 0) { 802 /* Logical CS threads = EUs per subslice * 7 threads per EU */ 803 uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7; 804 805 /* Fuse configurations may give more threads than expected, never less. */ 806 if (max_cs_threads > devinfo->max_cs_threads) 807 devinfo->max_cs_threads = max_cs_threads; 808 } 809 810 /* Maximum number of scalar compute shader invocations that can be run in 811 * parallel in the same subslice assuming SIMD32 dispatch. 812 * 813 * We don't advertise more than 64 threads, because we are limited to 64 by 814 * our usage of thread_width_max in the gpgpu walker command. This only 815 * currently impacts Haswell, which otherwise might be able to advertise 70 816 * threads. With SIMD32 and 64 threads, Haswell still provides twice the 817 * required the number of invocation needed for ARB_compute_shader. 818 */ 819 const unsigned max_threads = MIN2(64, devinfo->max_cs_threads); 820 const uint32_t max_invocations = 32 * max_threads; 821 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations; 822 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; 823 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; 824 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; 825 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; 826} 827 828/** 829 * Process driconf (drirc) options, setting appropriate context flags. 830 * 831 * intelInitExtensions still pokes at optionCache directly, in order to 832 * avoid advertising various extensions. No flags are set, so it makes 833 * sense to continue doing that there. 834 */ 835static void 836brw_process_driconf_options(struct brw_context *brw) 837{ 838 const struct gen_device_info *devinfo = &brw->screen->devinfo; 839 struct gl_context *ctx = &brw->ctx; 840 841 driOptionCache *options = &brw->optionCache; 842 driParseConfigFiles(options, &brw->screen->optionCache, 843 brw->driContext->driScreenPriv->myNum, 844 "i965", NULL); 845 846 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse"); 847 switch (bo_reuse_mode) { 848 case DRI_CONF_BO_REUSE_DISABLED: 849 break; 850 case DRI_CONF_BO_REUSE_ALL: 851 brw_bufmgr_enable_reuse(brw->bufmgr); 852 break; 853 } 854 855 if (INTEL_DEBUG & DEBUG_NO_HIZ) { 856 brw->has_hiz = false; 857 /* On gen6, you can only do separate stencil with HIZ. */ 858 if (devinfo->gen == 6) 859 brw->has_separate_stencil = false; 860 } 861 862 if (driQueryOptionb(options, "mesa_no_error")) 863 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR; 864 865 if (driQueryOptionb(options, "always_flush_batch")) { 866 fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); 867 brw->always_flush_batch = true; 868 } 869 870 if (driQueryOptionb(options, "always_flush_cache")) { 871 fprintf(stderr, "flushing GPU caches before/after each draw call\n"); 872 brw->always_flush_cache = true; 873 } 874 875 if (driQueryOptionb(options, "disable_throttling")) { 876 fprintf(stderr, "disabling flush throttling\n"); 877 brw->disable_throttling = true; 878 } 879 880 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile"); 881 882 if (driQueryOptionb(&brw->optionCache, "precise_trig")) 883 brw->screen->compiler->precise_trig = true; 884 885 ctx->Const.ForceGLSLExtensionsWarn = 886 driQueryOptionb(options, "force_glsl_extensions_warn"); 887 888 ctx->Const.ForceGLSLVersion = 889 driQueryOptioni(options, "force_glsl_version"); 890 891 ctx->Const.DisableGLSLLineContinuations = 892 driQueryOptionb(options, "disable_glsl_line_continuations"); 893 894 ctx->Const.AllowGLSLExtensionDirectiveMidShader = 895 driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); 896 897 ctx->Const.AllowGLSLBuiltinVariableRedeclaration = 898 driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration"); 899 900 ctx->Const.AllowHigherCompatVersion = 901 driQueryOptionb(options, "allow_higher_compat_version"); 902 903 ctx->Const.ForceGLSLAbsSqrt = 904 driQueryOptionb(options, "force_glsl_abs_sqrt"); 905 906 ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init"); 907 908 brw->dual_color_blend_by_location = 909 driQueryOptionb(options, "dual_color_blend_by_location"); 910 911 ctx->Const.AllowGLSLCrossStageInterpolationMismatch = 912 driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch"); 913 914 ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20); 915 driComputeOptionsSha1(&brw->screen->optionCache, 916 ctx->Const.dri_config_options_sha1); 917} 918 919GLboolean 920brwCreateContext(gl_api api, 921 const struct gl_config *mesaVis, 922 __DRIcontext *driContextPriv, 923 const struct __DriverContextConfig *ctx_config, 924 unsigned *dri_ctx_error, 925 void *sharedContextPrivate) 926{ 927 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; 928 struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate; 929 const struct gen_device_info *devinfo = &screen->devinfo; 930 struct dd_function_table functions; 931 932 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel 933 * provides us with context reset notifications. 934 */ 935 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG | 936 __DRI_CTX_FLAG_FORWARD_COMPATIBLE | 937 __DRI_CTX_FLAG_NO_ERROR; 938 939 if (screen->has_context_reset_notification) 940 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; 941 942 if (ctx_config->flags & ~allowed_flags) { 943 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG; 944 return false; 945 } 946 947 if (ctx_config->attribute_mask & 948 ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY | 949 __DRIVER_CONTEXT_ATTRIB_PRIORITY)) { 950 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE; 951 return false; 952 } 953 954 bool notify_reset = 955 ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) && 956 ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION); 957 958 struct brw_context *brw = rzalloc(NULL, struct brw_context); 959 if (!brw) { 960 fprintf(stderr, "%s: failed to alloc context\n", __func__); 961 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 962 return false; 963 } 964 965 driContextPriv->driverPrivate = brw; 966 brw->driContext = driContextPriv; 967 brw->screen = screen; 968 brw->bufmgr = screen->bufmgr; 969 970 brw->has_hiz = devinfo->has_hiz_and_separate_stencil; 971 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil; 972 973 brw->has_swizzling = screen->hw_has_swizzling; 974 975 brw->isl_dev = screen->isl_dev; 976 977 brw->vs.base.stage = MESA_SHADER_VERTEX; 978 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL; 979 brw->tes.base.stage = MESA_SHADER_TESS_EVAL; 980 brw->gs.base.stage = MESA_SHADER_GEOMETRY; 981 brw->wm.base.stage = MESA_SHADER_FRAGMENT; 982 brw->cs.base.stage = MESA_SHADER_COMPUTE; 983 984 brw_init_driver_functions(brw, &functions); 985 986 if (notify_reset) 987 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status; 988 989 struct gl_context *ctx = &brw->ctx; 990 991 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { 992 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 993 fprintf(stderr, "%s: failed to init mesa context\n", __func__); 994 intelDestroyContext(driContextPriv); 995 return false; 996 } 997 998 driContextSetFlags(ctx, ctx_config->flags); 999 1000 /* Initialize the software rasterizer and helper modules. 1001 * 1002 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for 1003 * software fallbacks (which we have to support on legacy GL to do weird 1004 * glDrawPixels(), glBitmap(), and other functions). 1005 */ 1006 if (api != API_OPENGL_CORE && api != API_OPENGLES2) { 1007 _swrast_CreateContext(ctx); 1008 } 1009 1010 _vbo_CreateContext(ctx); 1011 if (ctx->swrast_context) { 1012 _tnl_CreateContext(ctx); 1013 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; 1014 _swsetup_CreateContext(ctx); 1015 1016 /* Configure swrast to match hardware characteristics: */ 1017 _swrast_allow_pixel_fog(ctx, false); 1018 _swrast_allow_vertex_fog(ctx, true); 1019 } 1020 1021 _mesa_meta_init(ctx); 1022 1023 brw_process_driconf_options(brw); 1024 1025 if (INTEL_DEBUG & DEBUG_PERF) 1026 brw->perf_debug = true; 1027 1028 brw_initialize_cs_context_constants(brw); 1029 brw_initialize_context_constants(brw); 1030 1031 ctx->Const.ResetStrategy = notify_reset 1032 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB; 1033 1034 /* Reinitialize the context point state. It depends on ctx->Const values. */ 1035 _mesa_init_point(ctx); 1036 1037 intel_fbo_init(brw); 1038 1039 intel_batchbuffer_init(brw); 1040 1041 /* Create a new hardware context. Using a hardware context means that 1042 * our GPU state will be saved/restored on context switch, allowing us 1043 * to assume that the GPU is in the same state we left it in. 1044 * 1045 * This is required for transform feedback buffer offsets, query objects, 1046 * and also allows us to reduce how much state we have to emit. 1047 */ 1048 brw->hw_ctx = brw_create_hw_context(brw->bufmgr); 1049 if (!brw->hw_ctx && devinfo->gen >= 6) { 1050 fprintf(stderr, "Failed to create hardware context.\n"); 1051 intelDestroyContext(driContextPriv); 1052 return false; 1053 } 1054 1055 if (brw->hw_ctx) { 1056 int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY; 1057 if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) { 1058 switch (ctx_config->priority) { 1059 case __DRI_CTX_PRIORITY_LOW: 1060 hw_priority = GEN_CONTEXT_LOW_PRIORITY; 1061 break; 1062 case __DRI_CTX_PRIORITY_HIGH: 1063 hw_priority = GEN_CONTEXT_HIGH_PRIORITY; 1064 break; 1065 } 1066 } 1067 if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY && 1068 brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) { 1069 fprintf(stderr, 1070 "Failed to set priority [%d:%d] for hardware context.\n", 1071 ctx_config->priority, hw_priority); 1072 intelDestroyContext(driContextPriv); 1073 return false; 1074 } 1075 } 1076 1077 if (brw_init_pipe_control(brw, devinfo)) { 1078 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; 1079 intelDestroyContext(driContextPriv); 1080 return false; 1081 } 1082 1083 brw_upload_init(&brw->upload, brw->bufmgr, 65536); 1084 1085 brw_init_state(brw); 1086 1087 intelInitExtensions(ctx); 1088 1089 brw_init_surface_formats(brw); 1090 1091 brw_blorp_init(brw); 1092 1093 brw->urb.size = devinfo->urb.size; 1094 1095 if (devinfo->gen == 6) 1096 brw->urb.gs_present = false; 1097 1098 brw->prim_restart.in_progress = false; 1099 brw->prim_restart.enable_cut_index = false; 1100 brw->gs.enabled = false; 1101 brw->clip.viewport_count = 1; 1102 1103 brw->predicate.state = BRW_PREDICATE_STATE_RENDER; 1104 1105 brw->max_gtt_map_object_size = screen->max_gtt_map_object_size; 1106 1107 ctx->VertexProgram._MaintainTnlProgram = true; 1108 ctx->FragmentProgram._MaintainTexEnvProgram = true; 1109 1110 brw_draw_init( brw ); 1111 1112 if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) { 1113 /* Turn on some extra GL_ARB_debug_output generation. */ 1114 brw->perf_debug = true; 1115 } 1116 1117 if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) { 1118 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB; 1119 ctx->Const.RobustAccess = GL_TRUE; 1120 } 1121 1122 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 1123 brw_init_shader_time(brw); 1124 1125 _mesa_override_extensions(ctx); 1126 _mesa_compute_version(ctx); 1127 1128 /* GL_ARB_gl_spirv */ 1129 if (ctx->Extensions.ARB_gl_spirv) 1130 brw_initialize_spirv_supported_capabilities(brw); 1131 1132 _mesa_initialize_dispatch_tables(ctx); 1133 _mesa_initialize_vbo_vtxfmt(ctx); 1134 1135 if (ctx->Extensions.INTEL_performance_query) 1136 brw_init_performance_queries(brw); 1137 1138 vbo_use_buffer_objects(ctx); 1139 vbo_always_unmap_buffers(ctx); 1140 1141 brw->ctx.Cache = brw->screen->disk_cache; 1142 1143 if (driContextPriv->driScreenPriv->dri2.backgroundCallable && 1144 driQueryOptionb(&screen->optionCache, "mesa_glthread")) { 1145 /* Loader supports multithreading, and so do we. */ 1146 _mesa_glthread_init(ctx); 1147 } 1148 1149 return true; 1150} 1151 1152void 1153intelDestroyContext(__DRIcontext * driContextPriv) 1154{ 1155 struct brw_context *brw = 1156 (struct brw_context *) driContextPriv->driverPrivate; 1157 struct gl_context *ctx = &brw->ctx; 1158 1159 GET_CURRENT_CONTEXT(curctx); 1160 1161 if (curctx == NULL) { 1162 /* No current context, but we need one to release 1163 * renderbuffer surface when we release framebuffer. 1164 * So temporarily bind the context. 1165 */ 1166 _mesa_make_current(ctx, NULL, NULL); 1167 } 1168 1169 _mesa_glthread_destroy(&brw->ctx); 1170 1171 _mesa_meta_free(&brw->ctx); 1172 1173 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 1174 /* Force a report. */ 1175 brw->shader_time.report_time = 0; 1176 1177 brw_collect_and_report_shader_time(brw); 1178 brw_destroy_shader_time(brw); 1179 } 1180 1181 blorp_finish(&brw->blorp); 1182 1183 brw_destroy_state(brw); 1184 brw_draw_destroy(brw); 1185 1186 brw_bo_unreference(brw->curbe.curbe_bo); 1187 1188 brw_bo_unreference(brw->vs.base.scratch_bo); 1189 brw_bo_unreference(brw->tcs.base.scratch_bo); 1190 brw_bo_unreference(brw->tes.base.scratch_bo); 1191 brw_bo_unreference(brw->gs.base.scratch_bo); 1192 brw_bo_unreference(brw->wm.base.scratch_bo); 1193 1194 brw_bo_unreference(brw->vs.base.push_const_bo); 1195 brw_bo_unreference(brw->tcs.base.push_const_bo); 1196 brw_bo_unreference(brw->tes.base.push_const_bo); 1197 brw_bo_unreference(brw->gs.base.push_const_bo); 1198 brw_bo_unreference(brw->wm.base.push_const_bo); 1199 1200 brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx); 1201 1202 if (ctx->swrast_context) { 1203 _swsetup_DestroyContext(&brw->ctx); 1204 _tnl_DestroyContext(&brw->ctx); 1205 } 1206 _vbo_DestroyContext(&brw->ctx); 1207 1208 if (ctx->swrast_context) 1209 _swrast_DestroyContext(&brw->ctx); 1210 1211 brw_fini_pipe_control(brw); 1212 intel_batchbuffer_free(&brw->batch); 1213 1214 brw_bo_unreference(brw->throttle_batch[1]); 1215 brw_bo_unreference(brw->throttle_batch[0]); 1216 brw->throttle_batch[1] = NULL; 1217 brw->throttle_batch[0] = NULL; 1218 1219 driDestroyOptionCache(&brw->optionCache); 1220 1221 /* free the Mesa context */ 1222 _mesa_free_context_data(&brw->ctx, true); 1223 1224 ralloc_free(brw); 1225 driContextPriv->driverPrivate = NULL; 1226} 1227 1228GLboolean 1229intelUnbindContext(__DRIcontext * driContextPriv) 1230{ 1231 struct gl_context *ctx = driContextPriv->driverPrivate; 1232 _mesa_glthread_finish(ctx); 1233 1234 /* Unset current context and dispath table */ 1235 _mesa_make_current(NULL, NULL, NULL); 1236 1237 return true; 1238} 1239 1240/** 1241 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior 1242 * on window system framebuffers. 1243 * 1244 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if 1245 * your renderbuffer can do sRGB encode, and you can flip a switch that does 1246 * sRGB encode if the renderbuffer can handle it. You can ask specifically 1247 * for a visual where you're guaranteed to be capable, but it turns out that 1248 * everyone just makes all their ARGB8888 visuals capable and doesn't offer 1249 * incapable ones, because there's no difference between the two in resources 1250 * used. Applications thus get built that accidentally rely on the default 1251 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds 1252 * great... 1253 * 1254 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode 1255 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. 1256 * So they removed the enable knob and made it "if the renderbuffer is sRGB 1257 * capable, do sRGB encode". Then, for your window system renderbuffers, you 1258 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals 1259 * and get no sRGB encode (assuming that both kinds of visual are available). 1260 * Thus our choice to support sRGB by default on our visuals for desktop would 1261 * result in broken rendering of GLES apps that aren't expecting sRGB encode. 1262 * 1263 * Unfortunately, renderbuffer setup happens before a context is created. So 1264 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 1265 * context (without an sRGB visual), we go turn that back off before anyone 1266 * finds out. 1267 */ 1268static void 1269intel_gles3_srgb_workaround(struct brw_context *brw, 1270 struct gl_framebuffer *fb) 1271{ 1272 struct gl_context *ctx = &brw->ctx; 1273 1274 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) 1275 return; 1276 1277 for (int i = 0; i < BUFFER_COUNT; i++) { 1278 struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer; 1279 1280 /* Check if sRGB was specifically asked for. */ 1281 struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i); 1282 if (irb && irb->need_srgb) 1283 return; 1284 1285 if (rb) 1286 rb->Format = _mesa_get_srgb_format_linear(rb->Format); 1287 } 1288 /* Disable sRGB from framebuffers that are not compatible. */ 1289 fb->Visual.sRGBCapable = false; 1290} 1291 1292GLboolean 1293intelMakeCurrent(__DRIcontext * driContextPriv, 1294 __DRIdrawable * driDrawPriv, 1295 __DRIdrawable * driReadPriv) 1296{ 1297 struct brw_context *brw; 1298 1299 if (driContextPriv) 1300 brw = (struct brw_context *) driContextPriv->driverPrivate; 1301 else 1302 brw = NULL; 1303 1304 if (driContextPriv) { 1305 struct gl_context *ctx = &brw->ctx; 1306 struct gl_framebuffer *fb, *readFb; 1307 1308 if (driDrawPriv == NULL) { 1309 fb = _mesa_get_incomplete_framebuffer(); 1310 } else { 1311 fb = driDrawPriv->driverPrivate; 1312 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; 1313 } 1314 1315 if (driReadPriv == NULL) { 1316 readFb = _mesa_get_incomplete_framebuffer(); 1317 } else { 1318 readFb = driReadPriv->driverPrivate; 1319 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; 1320 } 1321 1322 /* The sRGB workaround changes the renderbuffer's format. We must change 1323 * the format before the renderbuffer's miptree get's allocated, otherwise 1324 * the formats of the renderbuffer and its miptree will differ. 1325 */ 1326 intel_gles3_srgb_workaround(brw, fb); 1327 intel_gles3_srgb_workaround(brw, readFb); 1328 1329 /* If the context viewport hasn't been initialized, force a call out to 1330 * the loader to get buffers so we have a drawable size for the initial 1331 * viewport. */ 1332 if (!brw->ctx.ViewportInitialized) 1333 intel_prepare_render(brw); 1334 1335 _mesa_make_current(ctx, fb, readFb); 1336 } else { 1337 GET_CURRENT_CONTEXT(ctx); 1338 _mesa_glthread_finish(ctx); 1339 _mesa_make_current(NULL, NULL, NULL); 1340 } 1341 1342 return true; 1343} 1344 1345void 1346intel_resolve_for_dri2_flush(struct brw_context *brw, 1347 __DRIdrawable *drawable) 1348{ 1349 const struct gen_device_info *devinfo = &brw->screen->devinfo; 1350 1351 if (devinfo->gen < 6) { 1352 /* MSAA and fast color clear are not supported, so don't waste time 1353 * checking whether a resolve is needed. 1354 */ 1355 return; 1356 } 1357 1358 struct gl_framebuffer *fb = drawable->driverPrivate; 1359 struct intel_renderbuffer *rb; 1360 1361 /* Usually, only the back buffer will need to be downsampled. However, 1362 * the front buffer will also need it if the user has rendered into it. 1363 */ 1364 static const gl_buffer_index buffers[2] = { 1365 BUFFER_BACK_LEFT, 1366 BUFFER_FRONT_LEFT, 1367 }; 1368 1369 for (int i = 0; i < 2; ++i) { 1370 rb = intel_get_renderbuffer(fb, buffers[i]); 1371 if (rb == NULL || rb->mt == NULL) 1372 continue; 1373 if (rb->mt->surf.samples == 1) { 1374 assert(rb->mt_layer == 0 && rb->mt_level == 0 && 1375 rb->layer_count == 1); 1376 intel_miptree_prepare_external(brw, rb->mt); 1377 } else { 1378 intel_renderbuffer_downsample(brw, rb); 1379 1380 /* Call prepare_external on the single-sample miptree to do any 1381 * needed resolves prior to handing it off to the window system. 1382 * This is needed in the case that rb->singlesample_mt is Y-tiled 1383 * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In 1384 * this case, the MSAA resolve above will write compressed data into 1385 * rb->singlesample_mt. 1386 * 1387 * TODO: Some day, if we decide to care about the tiny performance 1388 * hit we're taking by doing the MSAA resolve and then a CCS resolve, 1389 * we could detect this case and just allocate the single-sampled 1390 * miptree without aux. However, that would be a lot of plumbing and 1391 * this is a rather exotic case so it's not really worth it. 1392 */ 1393 intel_miptree_prepare_external(brw, rb->singlesample_mt); 1394 } 1395 } 1396} 1397 1398static unsigned 1399intel_bits_per_pixel(const struct intel_renderbuffer *rb) 1400{ 1401 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8; 1402} 1403 1404static void 1405intel_query_dri2_buffers(struct brw_context *brw, 1406 __DRIdrawable *drawable, 1407 __DRIbuffer **buffers, 1408 int *count); 1409 1410static void 1411intel_process_dri2_buffer(struct brw_context *brw, 1412 __DRIdrawable *drawable, 1413 __DRIbuffer *buffer, 1414 struct intel_renderbuffer *rb, 1415 const char *buffer_name); 1416 1417static void 1418intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable); 1419 1420static void 1421intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1422{ 1423 struct gl_framebuffer *fb = drawable->driverPrivate; 1424 struct intel_renderbuffer *rb; 1425 __DRIbuffer *buffers = NULL; 1426 int count; 1427 const char *region_name; 1428 1429 /* Set this up front, so that in case our buffers get invalidated 1430 * while we're getting new buffers, we don't clobber the stamp and 1431 * thus ignore the invalidate. */ 1432 drawable->lastStamp = drawable->dri2.stamp; 1433 1434 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1435 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1436 1437 intel_query_dri2_buffers(brw, drawable, &buffers, &count); 1438 1439 if (buffers == NULL) 1440 return; 1441 1442 for (int i = 0; i < count; i++) { 1443 switch (buffers[i].attachment) { 1444 case __DRI_BUFFER_FRONT_LEFT: 1445 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1446 region_name = "dri2 front buffer"; 1447 break; 1448 1449 case __DRI_BUFFER_FAKE_FRONT_LEFT: 1450 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1451 region_name = "dri2 fake front buffer"; 1452 break; 1453 1454 case __DRI_BUFFER_BACK_LEFT: 1455 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1456 region_name = "dri2 back buffer"; 1457 break; 1458 1459 case __DRI_BUFFER_DEPTH: 1460 case __DRI_BUFFER_HIZ: 1461 case __DRI_BUFFER_DEPTH_STENCIL: 1462 case __DRI_BUFFER_STENCIL: 1463 case __DRI_BUFFER_ACCUM: 1464 default: 1465 fprintf(stderr, 1466 "unhandled buffer attach event, attachment type %d\n", 1467 buffers[i].attachment); 1468 return; 1469 } 1470 1471 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name); 1472 } 1473 1474} 1475 1476void 1477intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) 1478{ 1479 struct brw_context *brw = context->driverPrivate; 1480 __DRIscreen *dri_screen = brw->screen->driScrnPriv; 1481 1482 /* Set this up front, so that in case our buffers get invalidated 1483 * while we're getting new buffers, we don't clobber the stamp and 1484 * thus ignore the invalidate. */ 1485 drawable->lastStamp = drawable->dri2.stamp; 1486 1487 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) 1488 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); 1489 1490 if (dri_screen->image.loader) 1491 intel_update_image_buffers(brw, drawable); 1492 else 1493 intel_update_dri2_buffers(brw, drawable); 1494 1495 driUpdateFramebufferSize(&brw->ctx, drawable); 1496} 1497 1498/** 1499 * intel_prepare_render should be called anywhere that curent read/drawbuffer 1500 * state is required. 1501 */ 1502void 1503intel_prepare_render(struct brw_context *brw) 1504{ 1505 struct gl_context *ctx = &brw->ctx; 1506 __DRIcontext *driContext = brw->driContext; 1507 __DRIdrawable *drawable; 1508 1509 drawable = driContext->driDrawablePriv; 1510 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { 1511 if (drawable->lastStamp != drawable->dri2.stamp) 1512 intel_update_renderbuffers(driContext, drawable); 1513 driContext->dri2.draw_stamp = drawable->dri2.stamp; 1514 } 1515 1516 drawable = driContext->driReadablePriv; 1517 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { 1518 if (drawable->lastStamp != drawable->dri2.stamp) 1519 intel_update_renderbuffers(driContext, drawable); 1520 driContext->dri2.read_stamp = drawable->dri2.stamp; 1521 } 1522 1523 /* If we're currently rendering to the front buffer, the rendering 1524 * that will happen next will probably dirty the front buffer. So 1525 * mark it as dirty here. 1526 */ 1527 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) 1528 brw->front_buffer_dirty = true; 1529 1530 if (brw->is_shared_buffer_bound) { 1531 /* Subsequent rendering will probably dirty the shared buffer. */ 1532 brw->is_shared_buffer_dirty = true; 1533 } 1534} 1535 1536/** 1537 * \brief Query DRI2 to obtain a DRIdrawable's buffers. 1538 * 1539 * To determine which DRI buffers to request, examine the renderbuffers 1540 * attached to the drawable's framebuffer. Then request the buffers with 1541 * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). 1542 * 1543 * This is called from intel_update_renderbuffers(). 1544 * 1545 * \param drawable Drawable whose buffers are queried. 1546 * \param buffers [out] List of buffers returned by DRI2 query. 1547 * \param buffer_count [out] Number of buffers returned. 1548 * 1549 * \see intel_update_renderbuffers() 1550 * \see DRI2GetBuffers() 1551 * \see DRI2GetBuffersWithFormat() 1552 */ 1553static void 1554intel_query_dri2_buffers(struct brw_context *brw, 1555 __DRIdrawable *drawable, 1556 __DRIbuffer **buffers, 1557 int *buffer_count) 1558{ 1559 __DRIscreen *dri_screen = brw->screen->driScrnPriv; 1560 struct gl_framebuffer *fb = drawable->driverPrivate; 1561 int i = 0; 1562 unsigned attachments[8]; 1563 1564 struct intel_renderbuffer *front_rb; 1565 struct intel_renderbuffer *back_rb; 1566 1567 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1568 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1569 1570 memset(attachments, 0, sizeof(attachments)); 1571 if ((_mesa_is_front_buffer_drawing(fb) || 1572 _mesa_is_front_buffer_reading(fb) || 1573 !back_rb) && front_rb) { 1574 /* If a fake front buffer is in use, then querying for 1575 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from 1576 * the real front buffer to the fake front buffer. So before doing the 1577 * query, we need to make sure all the pending drawing has landed in the 1578 * real front buffer. 1579 */ 1580 intel_batchbuffer_flush(brw); 1581 intel_flush_front(&brw->ctx); 1582 1583 attachments[i++] = __DRI_BUFFER_FRONT_LEFT; 1584 attachments[i++] = intel_bits_per_pixel(front_rb); 1585 } else if (front_rb && brw->front_buffer_dirty) { 1586 /* We have pending front buffer rendering, but we aren't querying for a 1587 * front buffer. If the front buffer we have is a fake front buffer, 1588 * the X server is going to throw it away when it processes the query. 1589 * So before doing the query, make sure all the pending drawing has 1590 * landed in the real front buffer. 1591 */ 1592 intel_batchbuffer_flush(brw); 1593 intel_flush_front(&brw->ctx); 1594 } 1595 1596 if (back_rb) { 1597 attachments[i++] = __DRI_BUFFER_BACK_LEFT; 1598 attachments[i++] = intel_bits_per_pixel(back_rb); 1599 } 1600 1601 assert(i <= ARRAY_SIZE(attachments)); 1602 1603 *buffers = 1604 dri_screen->dri2.loader->getBuffersWithFormat(drawable, 1605 &drawable->w, 1606 &drawable->h, 1607 attachments, i / 2, 1608 buffer_count, 1609 drawable->loaderPrivate); 1610} 1611 1612/** 1613 * \brief Assign a DRI buffer's DRM region to a renderbuffer. 1614 * 1615 * This is called from intel_update_renderbuffers(). 1616 * 1617 * \par Note: 1618 * DRI buffers whose attachment point is DRI2BufferStencil or 1619 * DRI2BufferDepthStencil are handled as special cases. 1620 * 1621 * \param buffer_name is a human readable name, such as "dri2 front buffer", 1622 * that is passed to brw_bo_gem_create_from_name(). 1623 * 1624 * \see intel_update_renderbuffers() 1625 */ 1626static void 1627intel_process_dri2_buffer(struct brw_context *brw, 1628 __DRIdrawable *drawable, 1629 __DRIbuffer *buffer, 1630 struct intel_renderbuffer *rb, 1631 const char *buffer_name) 1632{ 1633 struct gl_framebuffer *fb = drawable->driverPrivate; 1634 struct brw_bo *bo; 1635 1636 if (!rb) 1637 return; 1638 1639 unsigned num_samples = rb->Base.Base.NumSamples; 1640 1641 /* We try to avoid closing and reopening the same BO name, because the first 1642 * use of a mapping of the buffer involves a bunch of page faulting which is 1643 * moderately expensive. 1644 */ 1645 struct intel_mipmap_tree *last_mt; 1646 if (num_samples == 0) 1647 last_mt = rb->mt; 1648 else 1649 last_mt = rb->singlesample_mt; 1650 1651 uint32_t old_name = 0; 1652 if (last_mt) { 1653 /* The bo already has a name because the miptree was created by a 1654 * previous call to intel_process_dri2_buffer(). If a bo already has a 1655 * name, then brw_bo_flink() is a low-cost getter. It does not 1656 * create a new name. 1657 */ 1658 brw_bo_flink(last_mt->bo, &old_name); 1659 } 1660 1661 if (old_name == buffer->name) 1662 return; 1663 1664 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { 1665 fprintf(stderr, 1666 "attaching buffer %d, at %d, cpp %d, pitch %d\n", 1667 buffer->name, buffer->attachment, 1668 buffer->cpp, buffer->pitch); 1669 } 1670 1671 bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name, 1672 buffer->name); 1673 if (!bo) { 1674 fprintf(stderr, 1675 "Failed to open BO for returned DRI2 buffer " 1676 "(%dx%d, %s, named %d).\n" 1677 "This is likely a bug in the X Server that will lead to a " 1678 "crash soon.\n", 1679 drawable->w, drawable->h, buffer_name, buffer->name); 1680 return; 1681 } 1682 1683 uint32_t tiling, swizzle; 1684 brw_bo_get_tiling(bo, &tiling, &swizzle); 1685 1686 struct intel_mipmap_tree *mt = 1687 intel_miptree_create_for_bo(brw, 1688 bo, 1689 intel_rb_format(rb), 1690 0, 1691 drawable->w, 1692 drawable->h, 1693 1, 1694 buffer->pitch, 1695 isl_tiling_from_i915_tiling(tiling), 1696 MIPTREE_CREATE_DEFAULT); 1697 if (!mt) { 1698 brw_bo_unreference(bo); 1699 return; 1700 } 1701 1702 /* We got this BO from X11. We cana't assume that we have coherent texture 1703 * access because X may suddenly decide to use it for scan-out which would 1704 * destroy coherency. 1705 */ 1706 bo->cache_coherent = false; 1707 1708 if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt, 1709 drawable->w, drawable->h, 1710 buffer->pitch)) { 1711 brw_bo_unreference(bo); 1712 intel_miptree_release(&mt); 1713 return; 1714 } 1715 1716 if (_mesa_is_front_buffer_drawing(fb) && 1717 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT || 1718 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) && 1719 rb->Base.Base.NumSamples > 1) { 1720 intel_renderbuffer_upsample(brw, rb); 1721 } 1722 1723 assert(rb->mt); 1724 1725 brw_bo_unreference(bo); 1726} 1727 1728/** 1729 * \brief Query DRI image loader to obtain a DRIdrawable's buffers. 1730 * 1731 * To determine which DRI buffers to request, examine the renderbuffers 1732 * attached to the drawable's framebuffer. Then request the buffers from 1733 * the image loader 1734 * 1735 * This is called from intel_update_renderbuffers(). 1736 * 1737 * \param drawable Drawable whose buffers are queried. 1738 * \param buffers [out] List of buffers returned by DRI2 query. 1739 * \param buffer_count [out] Number of buffers returned. 1740 * 1741 * \see intel_update_renderbuffers() 1742 */ 1743 1744static void 1745intel_update_image_buffer(struct brw_context *intel, 1746 __DRIdrawable *drawable, 1747 struct intel_renderbuffer *rb, 1748 __DRIimage *buffer, 1749 enum __DRIimageBufferMask buffer_type) 1750{ 1751 struct gl_framebuffer *fb = drawable->driverPrivate; 1752 1753 if (!rb || !buffer->bo) 1754 return; 1755 1756 unsigned num_samples = rb->Base.Base.NumSamples; 1757 1758 /* Check and see if we're already bound to the right 1759 * buffer object 1760 */ 1761 struct intel_mipmap_tree *last_mt; 1762 if (num_samples == 0) 1763 last_mt = rb->mt; 1764 else 1765 last_mt = rb->singlesample_mt; 1766 1767 if (last_mt && last_mt->bo == buffer->bo) { 1768 if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { 1769 intel_miptree_make_shareable(intel, last_mt); 1770 } 1771 return; 1772 } 1773 1774 /* Only allow internal compression if samples == 0. For multisampled 1775 * window system buffers, the only thing the single-sampled buffer is used 1776 * for is as a resolve target. If we do any compression beyond what is 1777 * supported by the window system, we will just have to resolve so it's 1778 * probably better to just not bother. 1779 */ 1780 const bool allow_internal_aux = (num_samples == 0); 1781 1782 struct intel_mipmap_tree *mt = 1783 intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D, 1784 intel_rb_format(rb), 1785 allow_internal_aux); 1786 if (!mt) 1787 return; 1788 1789 if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt, 1790 buffer->width, buffer->height, 1791 buffer->pitch)) { 1792 intel_miptree_release(&mt); 1793 return; 1794 } 1795 1796 if (_mesa_is_front_buffer_drawing(fb) && 1797 buffer_type == __DRI_IMAGE_BUFFER_FRONT && 1798 rb->Base.Base.NumSamples > 1) { 1799 intel_renderbuffer_upsample(intel, rb); 1800 } 1801 1802 if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) { 1803 /* The compositor and the application may access this image 1804 * concurrently. The display hardware may even scanout the image while 1805 * the GPU is rendering to it. Aux surfaces cause difficulty with 1806 * concurrent access, so permanently disable aux for this miptree. 1807 * 1808 * Perhaps we could improve overall application performance by 1809 * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to 1810 * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER 1811 * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this 1812 * approach to be highly dependent on the application's GL usage. 1813 * 1814 * I [chadv] expect clever disabling/reenabling to be counterproductive 1815 * in the use cases I care about: applications that render nearly 1816 * realtime handwriting to the surface while possibly undergiong 1817 * simultaneously scanout as a display plane. The app requires low 1818 * render latency. Even though the app spends most of its time in 1819 * shared-buffer mode, it also frequently transitions between 1820 * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER) 1821 * mode. Visual sutter during the transitions should be avoided. 1822 * 1823 * In this case, I [chadv] believe reducing the GPU workload at 1824 * shared-buffer/double-buffer transitions would offer a smoother app 1825 * experience than any savings due to aux compression. But I've 1826 * collected no data to prove my theory. 1827 */ 1828 intel_miptree_make_shareable(intel, mt); 1829 } 1830} 1831 1832static void 1833intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable) 1834{ 1835 struct gl_framebuffer *fb = drawable->driverPrivate; 1836 __DRIscreen *dri_screen = brw->screen->driScrnPriv; 1837 struct intel_renderbuffer *front_rb; 1838 struct intel_renderbuffer *back_rb; 1839 struct __DRIimageList images; 1840 mesa_format format; 1841 uint32_t buffer_mask = 0; 1842 int ret; 1843 1844 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 1845 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 1846 1847 if (back_rb) 1848 format = intel_rb_format(back_rb); 1849 else if (front_rb) 1850 format = intel_rb_format(front_rb); 1851 else 1852 return; 1853 1854 if (front_rb && (_mesa_is_front_buffer_drawing(fb) || 1855 _mesa_is_front_buffer_reading(fb) || !back_rb)) { 1856 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT; 1857 } 1858 1859 if (back_rb) 1860 buffer_mask |= __DRI_IMAGE_BUFFER_BACK; 1861 1862 ret = dri_screen->image.loader->getBuffers(drawable, 1863 driGLFormatToImageFormat(format), 1864 &drawable->dri2.stamp, 1865 drawable->loaderPrivate, 1866 buffer_mask, 1867 &images); 1868 if (!ret) 1869 return; 1870 1871 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) { 1872 drawable->w = images.front->width; 1873 drawable->h = images.front->height; 1874 intel_update_image_buffer(brw, 1875 drawable, 1876 front_rb, 1877 images.front, 1878 __DRI_IMAGE_BUFFER_FRONT); 1879 } 1880 1881 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) { 1882 drawable->w = images.back->width; 1883 drawable->h = images.back->height; 1884 intel_update_image_buffer(brw, 1885 drawable, 1886 back_rb, 1887 images.back, 1888 __DRI_IMAGE_BUFFER_BACK); 1889 } 1890 1891 if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) { 1892 assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED); 1893 drawable->w = images.back->width; 1894 drawable->h = images.back->height; 1895 intel_update_image_buffer(brw, 1896 drawable, 1897 back_rb, 1898 images.back, 1899 __DRI_IMAGE_BUFFER_SHARED); 1900 brw->is_shared_buffer_bound = true; 1901 } else { 1902 brw->is_shared_buffer_bound = false; 1903 brw->is_shared_buffer_dirty = false; 1904 } 1905} 1906