1/* 2 * Copyright 2006 VMware, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include "main/enums.h" 27#include "main/macros.h" 28#include "main/mtypes.h" 29#include "main/fbobject.h" 30#include "main/framebuffer.h" 31#include "main/renderbuffer.h" 32#include "main/context.h" 33#include "main/teximage.h" 34#include "main/image.h" 35#include "main/condrender.h" 36#include "util/hash_table.h" 37#include "util/set.h" 38#include "util/u_memory.h" 39 40#include "swrast/swrast.h" 41#include "drivers/common/meta.h" 42 43#include "brw_batch.h" 44#include "brw_buffers.h" 45#include "brw_blit.h" 46#include "brw_fbo.h" 47#include "brw_mipmap_tree.h" 48#include "brw_image.h" 49#include "brw_screen.h" 50#include "brw_tex.h" 51#include "brw_context.h" 52#include "brw_defines.h" 53 54#define FILE_DEBUG_FLAG DEBUG_FBO 55 56/** Called by gl_renderbuffer::Delete() */ 57static void 58brw_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) 59{ 60 struct brw_renderbuffer *irb = brw_renderbuffer(rb); 61 62 assert(irb); 63 64 brw_miptree_release(&irb->mt); 65 brw_miptree_release(&irb->singlesample_mt); 66 67 _mesa_delete_renderbuffer(ctx, rb); 68} 69 70/** 71 * \brief Downsample a winsys renderbuffer from mt to singlesample_mt. 72 * 73 * If the miptree needs no downsample, then skip. 74 */ 75void 76brw_renderbuffer_downsample(struct brw_context *brw, 77 struct brw_renderbuffer *irb) 78{ 79 if (!irb->need_downsample) 80 return; 81 brw_miptree_updownsample(brw, irb->mt, irb->singlesample_mt); 82 irb->need_downsample = false; 83} 84 85/** 86 * \brief Upsample a winsys renderbuffer from singlesample_mt to mt. 87 * 88 * The upsample is done unconditionally. 89 */ 90void 91brw_renderbuffer_upsample(struct brw_context *brw, 92 struct brw_renderbuffer *irb) 93{ 94 assert(!irb->need_downsample); 95 96 brw_miptree_updownsample(brw, irb->singlesample_mt, irb->mt); 97} 98 99/** 100 * \see dd_function_table::MapRenderbuffer 101 */ 102static void 103brw_map_renderbuffer(struct gl_context *ctx, 104 struct gl_renderbuffer *rb, 105 GLuint x, GLuint y, GLuint w, GLuint h, 106 GLbitfield mode, 107 GLubyte **out_map, 108 GLint *out_stride, 109 bool flip_y) 110{ 111 struct brw_context *brw = brw_context(ctx); 112 struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; 113 struct brw_renderbuffer *irb = brw_renderbuffer(rb); 114 struct brw_mipmap_tree *mt; 115 void *map; 116 ptrdiff_t stride; 117 118 if (srb->Buffer) { 119 /* this is a malloc'd renderbuffer (accum buffer), not an irb */ 120 GLint bpp = _mesa_get_format_bytes(rb->Format); 121 GLint rowStride = srb->RowStride; 122 *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp; 123 *out_stride = rowStride; 124 return; 125 } 126 127 brw_prepare_render(brw); 128 129 /* The MapRenderbuffer API should always return a single-sampled mapping. 130 * The case we are asked to map multisampled RBs is in glReadPixels() (or 131 * swrast paths like glCopyTexImage()) from a window-system MSAA buffer, 132 * and GL expects an automatic resolve to happen. 133 * 134 * If it's a color miptree, there is a ->singlesample_mt which wraps the 135 * actual window system renderbuffer (which we may resolve to at any time), 136 * while the miptree itself is our driver-private allocation. If it's a 137 * depth or stencil miptree, we have a private MSAA buffer and no shared 138 * singlesample buffer, and since we don't expect anybody to ever actually 139 * resolve it, we just make a temporary singlesample buffer now when we 140 * have to. 141 */ 142 if (rb->NumSamples > 1) { 143 if (!irb->singlesample_mt) { 144 irb->singlesample_mt = 145 brw_miptree_create_for_renderbuffer(brw, irb->mt->format, 146 rb->Width, rb->Height, 147 1 /*num_samples*/); 148 if (!irb->singlesample_mt) 149 goto fail; 150 irb->singlesample_mt_is_tmp = true; 151 irb->need_downsample = true; 152 } 153 154 brw_renderbuffer_downsample(brw, irb); 155 mt = irb->singlesample_mt; 156 157 irb->need_map_upsample = mode & GL_MAP_WRITE_BIT; 158 } else { 159 mt = irb->mt; 160 } 161 162 /* For a window-system renderbuffer, we need to flip the mapping we receive 163 * upside-down. So we need to ask for a rectangle on flipped vertically, and 164 * we then return a pointer to the bottom of it with a negative stride. 165 */ 166 if (flip_y) { 167 y = rb->Height - y - h; 168 } 169 170 brw_miptree_map(brw, mt, irb->mt_level, irb->mt_layer, 171 x, y, w, h, mode, &map, &stride); 172 173 if (flip_y) { 174 map += (h - 1) * stride; 175 stride = -stride; 176 } 177 178 DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%"PRIdPTR"\n", 179 __func__, rb->Name, _mesa_get_format_name(rb->Format), 180 x, y, w, h, map, stride); 181 182 *out_map = map; 183 *out_stride = stride; 184 return; 185 186fail: 187 *out_map = NULL; 188 *out_stride = 0; 189} 190 191/** 192 * \see dd_function_table::UnmapRenderbuffer 193 */ 194static void 195brw_unmap_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) 196{ 197 struct brw_context *brw = brw_context(ctx); 198 struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; 199 struct brw_renderbuffer *irb = brw_renderbuffer(rb); 200 struct brw_mipmap_tree *mt; 201 202 DBG("%s: rb %d (%s)\n", __func__, 203 rb->Name, _mesa_get_format_name(rb->Format)); 204 205 if (srb->Buffer) { 206 /* this is a malloc'd renderbuffer (accum buffer) */ 207 /* nothing to do */ 208 return; 209 } 210 211 if (rb->NumSamples > 1) { 212 mt = irb->singlesample_mt; 213 } else { 214 mt = irb->mt; 215 } 216 217 brw_miptree_unmap(brw, mt, irb->mt_level, irb->mt_layer); 218 219 if (irb->need_map_upsample) { 220 brw_renderbuffer_upsample(brw, irb); 221 irb->need_map_upsample = false; 222 } 223 224 if (irb->singlesample_mt_is_tmp) 225 brw_miptree_release(&irb->singlesample_mt); 226} 227 228 229/** 230 * Round up the requested multisample count to the next supported sample size. 231 */ 232unsigned 233brw_quantize_num_samples(struct brw_screen *intel, unsigned num_samples) 234{ 235 const int *msaa_modes = brw_supported_msaa_modes(intel); 236 int quantized_samples = 0; 237 238 for (int i = 0; msaa_modes[i] != -1; ++i) { 239 if (msaa_modes[i] >= num_samples) 240 quantized_samples = msaa_modes[i]; 241 else 242 break; 243 } 244 245 return quantized_samples; 246} 247 248static mesa_format 249brw_renderbuffer_format(struct gl_context * ctx, GLenum internalFormat) 250{ 251 struct brw_context *brw = brw_context(ctx); 252 ASSERTED const struct intel_device_info *devinfo = &brw->screen->devinfo; 253 254 switch (internalFormat) { 255 default: 256 /* Use the same format-choice logic as for textures. 257 * Renderbuffers aren't any different from textures for us, 258 * except they're less useful because you can't texture with 259 * them. 260 */ 261 return ctx->Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D, 262 internalFormat, 263 GL_NONE, GL_NONE); 264 break; 265 case GL_STENCIL_INDEX: 266 case GL_STENCIL_INDEX1_EXT: 267 case GL_STENCIL_INDEX4_EXT: 268 case GL_STENCIL_INDEX8_EXT: 269 case GL_STENCIL_INDEX16_EXT: 270 /* These aren't actual texture formats, so force them here. */ 271 if (brw->has_separate_stencil) { 272 return MESA_FORMAT_S_UINT8; 273 } else { 274 assert(!devinfo->must_use_separate_stencil); 275 return MESA_FORMAT_Z24_UNORM_S8_UINT; 276 } 277 } 278} 279 280static GLboolean 281brw_alloc_private_renderbuffer_storage(struct gl_context *ctx, 282 struct gl_renderbuffer *rb, 283 GLenum internalFormat, 284 GLuint width, GLuint height) 285{ 286 struct brw_context *brw = brw_context(ctx); 287 struct brw_screen *screen = brw->screen; 288 struct brw_renderbuffer *irb = brw_renderbuffer(rb); 289 290 assert(rb->Format != MESA_FORMAT_NONE); 291 292 rb->NumSamples = brw_quantize_num_samples(screen, rb->NumSamples); 293 rb->NumStorageSamples = rb->NumSamples; 294 rb->Width = width; 295 rb->Height = height; 296 rb->_BaseFormat = _mesa_get_format_base_format(rb->Format); 297 298 brw_miptree_release(&irb->mt); 299 300 DBG("%s: %s: %s (%dx%d)\n", __func__, 301 _mesa_enum_to_string(internalFormat), 302 _mesa_get_format_name(rb->Format), width, height); 303 304 if (width == 0 || height == 0) 305 return true; 306 307 irb->mt = brw_miptree_create_for_renderbuffer(brw, rb->Format, 308 width, height, 309 MAX2(rb->NumSamples, 1)); 310 if (!irb->mt) 311 return false; 312 313 irb->layer_count = 1; 314 315 return true; 316} 317 318/** 319 * Called via glRenderbufferStorageEXT() to set the format and allocate 320 * storage for a user-created renderbuffer. 321 */ 322static GLboolean 323brw_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, 324 GLenum internalFormat, 325 GLuint width, GLuint height) 326{ 327 rb->Format = brw_renderbuffer_format(ctx, internalFormat); 328 return brw_alloc_private_renderbuffer_storage(ctx, rb, internalFormat, width, height); 329} 330 331static mesa_format 332fallback_rgbx_to_rgba(struct brw_screen *screen, struct gl_renderbuffer *rb, 333 mesa_format original_format) 334{ 335 mesa_format format = original_format; 336 337 /* The base format and internal format must be derived from the user-visible 338 * format (that is, the gl_config's format), even if we internally use 339 * choose a different format for the renderbuffer. Otherwise, rendering may 340 * use incorrect channel write masks. 341 */ 342 rb->_BaseFormat = _mesa_get_format_base_format(original_format); 343 rb->InternalFormat = rb->_BaseFormat; 344 345 if (!screen->mesa_format_supports_render[original_format]) { 346 /* The glRenderbufferStorage paths in core Mesa detect if the driver 347 * does not support the user-requested format, and then searches for 348 * a fallback format. The DRI code bypasses core Mesa, though. So we do 349 * the fallbacks here. 350 * 351 * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android 352 * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces. 353 */ 354 format = _mesa_format_fallback_rgbx_to_rgba(original_format); 355 assert(screen->mesa_format_supports_render[format]); 356 } 357 return format; 358} 359 360static void 361brw_image_target_renderbuffer_storage(struct gl_context *ctx, 362 struct gl_renderbuffer *rb, 363 void *image_handle) 364{ 365 struct brw_context *brw = brw_context(ctx); 366 struct brw_renderbuffer *irb; 367 __DRIscreen *dri_screen = brw->screen->driScrnPriv; 368 __DRIimage *image; 369 370 image = dri_screen->dri2.image->lookupEGLImage(dri_screen, image_handle, 371 dri_screen->loaderPrivate); 372 if (image == NULL) 373 return; 374 375 if (image->planar_format && image->planar_format->nplanes > 1) { 376 _mesa_error(ctx, GL_INVALID_OPERATION, 377 "glEGLImageTargetRenderbufferStorage(planar buffers are not " 378 "supported as render targets.)"); 379 return; 380 } 381 382 rb->Format = fallback_rgbx_to_rgba(brw->screen, rb, image->format); 383 384 mesa_format chosen_format = rb->Format == image->format ? 385 image->format : rb->Format; 386 387 /* __DRIimage is opaque to the core so it has to be checked here */ 388 if (!brw->mesa_format_supports_render[chosen_format]) { 389 _mesa_error(ctx, GL_INVALID_OPERATION, 390 "glEGLImageTargetRenderbufferStorage(unsupported image format)"); 391 return; 392 } 393 394 irb = brw_renderbuffer(rb); 395 brw_miptree_release(&irb->mt); 396 397 /* Disable creation of the miptree's aux buffers because the driver exposes 398 * no EGL API to manage them. That is, there is no API for resolving the aux 399 * buffer's content to the main buffer nor for invalidating the aux buffer's 400 * content. 401 */ 402 irb->mt = brw_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D, 403 rb->Format, false); 404 if (!irb->mt) 405 return; 406 407 rb->Width = image->width; 408 rb->Height = image->height; 409 rb->NeedsFinishRenderTexture = true; 410 irb->layer_count = 1; 411} 412 413/** 414 * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a 415 * window system framebuffer is resized. 416 * 417 * Any actual buffer reallocations for hardware renderbuffers (which would 418 * have triggered _mesa_resize_framebuffer()) were done by 419 * brw_process_dri2_buffer(). 420 */ 421static GLboolean 422brw_alloc_window_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, 423 GLenum internalFormat, GLuint width, GLuint height) 424{ 425 (void) ctx; 426 assert(rb->Name == 0); 427 rb->Width = width; 428 rb->Height = height; 429 rb->InternalFormat = internalFormat; 430 431 return true; 432} 433 434/** Dummy function for gl_renderbuffer::AllocStorage() */ 435static GLboolean 436brw_nop_alloc_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, 437 GLenum internalFormat, GLuint width, GLuint height) 438{ 439 (void) rb; 440 (void) internalFormat; 441 (void) width; 442 (void) height; 443 _mesa_problem(ctx, "brw_nop_alloc_storage should never be called."); 444 return false; 445} 446 447/** 448 * Create an brw_renderbuffer for a __DRIdrawable. This function is 449 * unrelated to GL renderbuffers (that is, those created by 450 * glGenRenderbuffers). 451 * 452 * \param num_samples must be quantized. 453 */ 454struct brw_renderbuffer * 455brw_create_winsys_renderbuffer(struct brw_screen *screen, 456 mesa_format format, unsigned num_samples) 457{ 458 struct brw_renderbuffer *irb = CALLOC_STRUCT(brw_renderbuffer); 459 if (!irb) 460 return NULL; 461 462 struct gl_renderbuffer *rb = &irb->Base.Base; 463 irb->layer_count = 1; 464 465 _mesa_init_renderbuffer(rb, 0); 466 rb->ClassID = INTEL_RB_CLASS; 467 rb->NumSamples = num_samples; 468 rb->NumStorageSamples = num_samples; 469 470 rb->Format = fallback_rgbx_to_rgba(screen, rb, format); 471 472 /* intel-specific methods */ 473 rb->Delete = brw_delete_renderbuffer; 474 rb->AllocStorage = brw_alloc_window_storage; 475 476 return irb; 477} 478 479/** 480 * Private window-system buffers (as opposed to ones shared with the display 481 * server created with brw_create_winsys_renderbuffer()) are most similar in their 482 * handling to user-created renderbuffers, but they have a resize handler that 483 * may be called at brw_update_renderbuffers() time. 484 * 485 * \param num_samples must be quantized. 486 */ 487struct brw_renderbuffer * 488brw_create_private_renderbuffer(struct brw_screen *screen, 489 mesa_format format, unsigned num_samples) 490{ 491 struct brw_renderbuffer *irb; 492 493 irb = brw_create_winsys_renderbuffer(screen, format, num_samples); 494 irb->Base.Base.AllocStorage = brw_alloc_private_renderbuffer_storage; 495 496 return irb; 497} 498 499/** 500 * Create a new renderbuffer object. 501 * Typically called via glBindRenderbufferEXT(). 502 */ 503static struct gl_renderbuffer * 504brw_new_renderbuffer(struct gl_context *ctx, GLuint name) 505{ 506 struct brw_renderbuffer *irb; 507 struct gl_renderbuffer *rb; 508 509 irb = CALLOC_STRUCT(brw_renderbuffer); 510 if (!irb) { 511 _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); 512 return NULL; 513 } 514 515 rb = &irb->Base.Base; 516 517 _mesa_init_renderbuffer(rb, name); 518 rb->ClassID = INTEL_RB_CLASS; 519 520 /* intel-specific methods */ 521 rb->Delete = brw_delete_renderbuffer; 522 rb->AllocStorage = brw_alloc_renderbuffer_storage; 523 /* span routines set in alloc_storage function */ 524 525 return rb; 526} 527 528static bool 529brw_renderbuffer_update_wrapper(struct brw_context *brw, 530 struct brw_renderbuffer *irb, 531 struct gl_texture_image *image, 532 uint32_t layer, 533 bool layered) 534{ 535 struct gl_renderbuffer *rb = &irb->Base.Base; 536 struct brw_texture_image *intel_image = brw_texture_image(image); 537 struct brw_mipmap_tree *mt = intel_image->mt; 538 int level = image->Level; 539 540 rb->AllocStorage = brw_nop_alloc_storage; 541 542 /* adjust for texture view parameters */ 543 layer += image->TexObject->Attrib.MinLayer; 544 level += image->TexObject->Attrib.MinLevel; 545 546 brw_miptree_check_level_layer(mt, level, layer); 547 irb->mt_level = level; 548 irb->mt_layer = layer; 549 550 if (!layered) { 551 irb->layer_count = 1; 552 } else if (mt->target != GL_TEXTURE_3D && image->TexObject->Attrib.NumLayers > 0) { 553 irb->layer_count = image->TexObject->Attrib.NumLayers; 554 } else { 555 irb->layer_count = mt->surf.dim == ISL_SURF_DIM_3D ? 556 minify(mt->surf.logical_level0_px.depth, level) : 557 mt->surf.logical_level0_px.array_len; 558 } 559 560 brw_miptree_reference(&irb->mt, mt); 561 562 brw_renderbuffer_set_draw_offset(irb); 563 564 return true; 565} 566 567void 568brw_renderbuffer_set_draw_offset(struct brw_renderbuffer *irb) 569{ 570 unsigned int dst_x, dst_y; 571 572 /* compute offset of the particular 2D image within the texture region */ 573 brw_miptree_get_image_offset(irb->mt, irb->mt_level, irb->mt_layer, 574 &dst_x, &dst_y); 575 576 irb->draw_x = dst_x; 577 irb->draw_y = dst_y; 578} 579 580/** 581 * Called by glFramebufferTexture[123]DEXT() (and other places) to 582 * prepare for rendering into texture memory. This might be called 583 * many times to choose different texture levels, cube faces, etc 584 * before brw_finish_render_texture() is ever called. 585 */ 586static void 587brw_render_texture(struct gl_context * ctx, 588 struct gl_framebuffer *fb, 589 struct gl_renderbuffer_attachment *att) 590{ 591 struct brw_context *brw = brw_context(ctx); 592 struct gl_renderbuffer *rb = att->Renderbuffer; 593 struct brw_renderbuffer *irb = brw_renderbuffer(rb); 594 struct gl_texture_image *image = rb->TexImage; 595 struct brw_texture_image *intel_image = brw_texture_image(image); 596 struct brw_mipmap_tree *mt = intel_image->mt; 597 int layer; 598 599 (void) fb; 600 601 if (att->CubeMapFace > 0) { 602 assert(att->Zoffset == 0); 603 layer = att->CubeMapFace; 604 } else { 605 layer = att->Zoffset; 606 } 607 608 if (!intel_image->mt) { 609 /* Fallback on drawing to a texture that doesn't have a miptree 610 * (has a border, width/height 0, etc.) 611 */ 612 _swrast_render_texture(ctx, fb, att); 613 return; 614 } 615 616 brw_miptree_check_level_layer(mt, att->TextureLevel, layer); 617 618 if (!brw_renderbuffer_update_wrapper(brw, irb, image, layer, att->Layered)) { 619 _swrast_render_texture(ctx, fb, att); 620 return; 621 } 622 623 DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n", 624 _mesa_get_format_name(image->TexFormat), 625 att->Texture->Name, image->Width, image->Height, image->Depth, 626 rb->RefCount); 627} 628 629 630#define fbo_incomplete(fb, error_id, ...) do { \ 631 static GLuint msg_id = 0; \ 632 if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \ 633 _mesa_gl_debugf(ctx, &msg_id, \ 634 MESA_DEBUG_SOURCE_API, \ 635 MESA_DEBUG_TYPE_OTHER, \ 636 MESA_DEBUG_SEVERITY_MEDIUM, \ 637 __VA_ARGS__); \ 638 } \ 639 DBG(__VA_ARGS__); \ 640 fb->_Status = error_id; \ 641 } while (0) 642 643/** 644 * Do additional "completeness" testing of a framebuffer object. 645 */ 646static void 647brw_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) 648{ 649 struct brw_context *brw = brw_context(ctx); 650 const struct intel_device_info *devinfo = &brw->screen->devinfo; 651 struct brw_renderbuffer *depthRb = 652 brw_get_renderbuffer(fb, BUFFER_DEPTH); 653 struct brw_renderbuffer *stencilRb = 654 brw_get_renderbuffer(fb, BUFFER_STENCIL); 655 struct brw_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL; 656 unsigned i; 657 658 DBG("%s() on fb %p (%s)\n", __func__, 659 fb, (fb == ctx->DrawBuffer ? "drawbuffer" : 660 (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer"))); 661 662 if (depthRb) 663 depth_mt = depthRb->mt; 664 if (stencilRb) { 665 stencil_mt = stencilRb->mt; 666 if (stencil_mt->stencil_mt) 667 stencil_mt = stencil_mt->stencil_mt; 668 } 669 670 if (depth_mt && stencil_mt) { 671 if (devinfo->ver >= 6) { 672 const unsigned d_width = depth_mt->surf.phys_level0_sa.width; 673 const unsigned d_height = depth_mt->surf.phys_level0_sa.height; 674 const unsigned d_depth = depth_mt->surf.dim == ISL_SURF_DIM_3D ? 675 depth_mt->surf.phys_level0_sa.depth : 676 depth_mt->surf.phys_level0_sa.array_len; 677 678 const unsigned s_width = stencil_mt->surf.phys_level0_sa.width; 679 const unsigned s_height = stencil_mt->surf.phys_level0_sa.height; 680 const unsigned s_depth = stencil_mt->surf.dim == ISL_SURF_DIM_3D ? 681 stencil_mt->surf.phys_level0_sa.depth : 682 stencil_mt->surf.phys_level0_sa.array_len; 683 684 /* For gen >= 6, we are using the lod/minimum-array-element fields 685 * and supporting layered rendering. This means that we must restrict 686 * the depth & stencil attachments to match in various more retrictive 687 * ways. (width, height, depth, LOD and layer) 688 */ 689 if (d_width != s_width || 690 d_height != s_height || 691 d_depth != s_depth || 692 depthRb->mt_level != stencilRb->mt_level || 693 depthRb->mt_layer != stencilRb->mt_layer) { 694 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 695 "FBO incomplete: depth and stencil must match in" 696 "width, height, depth, LOD and layer\n"); 697 } 698 } 699 if (depth_mt == stencil_mt) { 700 /* For true packed depth/stencil (not faked on prefers-separate-stencil 701 * hardware) we need to be sure they're the same level/layer, since 702 * we'll be emitting a single packet describing the packed setup. 703 */ 704 if (depthRb->mt_level != stencilRb->mt_level || 705 depthRb->mt_layer != stencilRb->mt_layer) { 706 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 707 "FBO incomplete: depth image level/layer %d/%d != " 708 "stencil image %d/%d\n", 709 depthRb->mt_level, 710 depthRb->mt_layer, 711 stencilRb->mt_level, 712 stencilRb->mt_layer); 713 } 714 } else { 715 if (!brw->has_separate_stencil) { 716 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 717 "FBO incomplete: separate stencil unsupported\n"); 718 } 719 if (stencil_mt->format != MESA_FORMAT_S_UINT8) { 720 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 721 "FBO incomplete: separate stencil is %s " 722 "instead of S8\n", 723 _mesa_get_format_name(stencil_mt->format)); 724 } 725 if (devinfo->ver < 7 && !brw_renderbuffer_has_hiz(depthRb)) { 726 /* Before Gfx7, separate depth and stencil buffers can be used 727 * only if HiZ is enabled. From the Sandybridge PRM, Volume 2, 728 * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable: 729 * [DevSNB]: This field must be set to the same value (enabled 730 * or disabled) as Hierarchical Depth Buffer Enable. 731 */ 732 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 733 "FBO incomplete: separate stencil without HiZ\n"); 734 } 735 } 736 } 737 738 for (i = 0; i < ARRAY_SIZE(fb->Attachment); i++) { 739 struct gl_renderbuffer *rb; 740 struct brw_renderbuffer *irb; 741 742 if (fb->Attachment[i].Type == GL_NONE) 743 continue; 744 745 /* A supported attachment will have a Renderbuffer set either 746 * from being a Renderbuffer or being a texture that got the 747 * brw_wrap_texture() treatment. 748 */ 749 rb = fb->Attachment[i].Renderbuffer; 750 if (rb == NULL) { 751 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 752 "FBO incomplete: attachment without " 753 "renderbuffer\n"); 754 continue; 755 } 756 757 if (fb->Attachment[i].Type == GL_TEXTURE) { 758 if (rb->TexImage->Border) { 759 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 760 "FBO incomplete: texture with border\n"); 761 continue; 762 } 763 } 764 765 irb = brw_renderbuffer(rb); 766 if (irb == NULL) { 767 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 768 "FBO incomplete: software rendering renderbuffer\n"); 769 continue; 770 } 771 772 if (rb->Format == MESA_FORMAT_R_SRGB8) { 773 fbo_incomplete(fb, GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT, 774 "FBO incomplete: Format not color renderable: %s\n", 775 _mesa_get_format_name(rb->Format)); 776 continue; 777 } 778 779 if (!brw_render_target_supported(brw, rb)) { 780 fbo_incomplete(fb, GL_FRAMEBUFFER_UNSUPPORTED, 781 "FBO incomplete: Unsupported HW " 782 "texture/renderbuffer format attached: %s\n", 783 _mesa_get_format_name(brw_rb_format(irb))); 784 } 785 } 786} 787 788/** 789 * Try to do a glBlitFramebuffer using glCopyTexSubImage2D 790 * We can do this when the dst renderbuffer is actually a texture and 791 * there is no scaling, mirroring or scissoring. 792 * 793 * \return new buffer mask indicating the buffers left to blit using the 794 * normal path. 795 */ 796static GLbitfield 797brw_blit_framebuffer_with_blitter(struct gl_context *ctx, 798 const struct gl_framebuffer *readFb, 799 const struct gl_framebuffer *drawFb, 800 GLint srcX0, GLint srcY0, 801 GLint srcX1, GLint srcY1, 802 GLint dstX0, GLint dstY0, 803 GLint dstX1, GLint dstY1, 804 GLbitfield mask) 805{ 806 struct brw_context *brw = brw_context(ctx); 807 808 /* Sync up the state of window system buffers. We need to do this before 809 * we go looking for the buffers. 810 */ 811 brw_prepare_render(brw); 812 813 if (mask & GL_COLOR_BUFFER_BIT) { 814 unsigned i; 815 struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer; 816 struct brw_renderbuffer *src_irb = brw_renderbuffer(src_rb); 817 818 if (!src_irb) { 819 perf_debug("glBlitFramebuffer(): missing src renderbuffer. " 820 "Falling back to software rendering.\n"); 821 return mask; 822 } 823 824 /* If the source and destination are the same size with no mirroring, 825 * the rectangles are within the size of the texture and there is no 826 * scissor, then we can probably use the blit engine. 827 */ 828 if (!(srcX0 - srcX1 == dstX0 - dstX1 && 829 srcY0 - srcY1 == dstY0 - dstY1 && 830 srcX1 >= srcX0 && 831 srcY1 >= srcY0 && 832 srcX0 >= 0 && srcX1 <= readFb->Width && 833 srcY0 >= 0 && srcY1 <= readFb->Height && 834 dstX0 >= 0 && dstX1 <= drawFb->Width && 835 dstY0 >= 0 && dstY1 <= drawFb->Height && 836 !(ctx->Scissor.EnableFlags))) { 837 perf_debug("glBlitFramebuffer(): non-1:1 blit. " 838 "Falling back to software rendering.\n"); 839 return mask; 840 } 841 842 /* Blit to all active draw buffers. We don't do any pre-checking, 843 * because we assume that copying to MRTs is rare, and failure midway 844 * through copying is even more rare. Even if it was to occur, it's 845 * safe to let meta start the copy over from scratch, because 846 * glBlitFramebuffer completely overwrites the destination pixels, and 847 * results are undefined if any destination pixels have a dependency on 848 * source pixels. 849 */ 850 for (i = 0; i < drawFb->_NumColorDrawBuffers; i++) { 851 struct gl_renderbuffer *dst_rb = drawFb->_ColorDrawBuffers[i]; 852 struct brw_renderbuffer *dst_irb = brw_renderbuffer(dst_rb); 853 854 if (!dst_irb) { 855 perf_debug("glBlitFramebuffer(): missing dst renderbuffer. " 856 "Falling back to software rendering.\n"); 857 return mask; 858 } 859 860 if (ctx->Color.sRGBEnabled && 861 _mesa_is_format_srgb(src_irb->mt->format) != 862 _mesa_is_format_srgb(dst_irb->mt->format)) { 863 perf_debug("glBlitFramebuffer() with sRGB conversion cannot be " 864 "handled by BLT path.\n"); 865 return mask; 866 } 867 868 if (!brw_miptree_blit(brw, 869 src_irb->mt, 870 src_irb->mt_level, src_irb->mt_layer, 871 srcX0, srcY0, readFb->FlipY, 872 dst_irb->mt, 873 dst_irb->mt_level, dst_irb->mt_layer, 874 dstX0, dstY0, drawFb->FlipY, 875 dstX1 - dstX0, dstY1 - dstY0, 876 COLOR_LOGICOP_COPY)) { 877 perf_debug("glBlitFramebuffer(): unknown blit failure. " 878 "Falling back to software rendering.\n"); 879 return mask; 880 } 881 } 882 883 mask &= ~GL_COLOR_BUFFER_BIT; 884 } 885 886 return mask; 887} 888 889static void 890brw_blit_framebuffer(struct gl_context *ctx, 891 struct gl_framebuffer *readFb, 892 struct gl_framebuffer *drawFb, 893 GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, 894 GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, 895 GLbitfield mask, GLenum filter) 896{ 897 struct brw_context *brw = brw_context(ctx); 898 const struct intel_device_info *devinfo = &brw->screen->devinfo; 899 900 /* Page 679 of OpenGL 4.4 spec says: 901 * "Added BlitFramebuffer to commands affected by conditional rendering in 902 * section 10.10 (Bug 9562)." 903 */ 904 if (!_mesa_check_conditional_render(ctx)) 905 return; 906 907 if (devinfo->ver < 6) { 908 /* On gfx4-5, try BLT first. 909 * 910 * Gfx4-5 have a single ring for both 3D and BLT operations, so there's 911 * no inter-ring synchronization issues like on Gfx6+. It is apparently 912 * faster than using the 3D pipeline. Original Gfx4 also has to rebase 913 * and copy miptree slices in order to render to unaligned locations. 914 */ 915 mask = brw_blit_framebuffer_with_blitter(ctx, readFb, drawFb, 916 srcX0, srcY0, srcX1, srcY1, 917 dstX0, dstY0, dstX1, dstY1, 918 mask); 919 if (mask == 0x0) 920 return; 921 } 922 923 mask = brw_blorp_framebuffer(brw, readFb, drawFb, 924 srcX0, srcY0, srcX1, srcY1, 925 dstX0, dstY0, dstX1, dstY1, 926 mask, filter); 927 if (mask == 0x0) 928 return; 929 930 /* brw_blorp_framebuffer should always be successful for color blits. */ 931 assert(!(mask & GL_COLOR_BUFFER_BIT)); 932 933 mask = _mesa_meta_BlitFramebuffer(ctx, readFb, drawFb, 934 srcX0, srcY0, srcX1, srcY1, 935 dstX0, dstY0, dstX1, dstY1, 936 mask, filter); 937 if (mask == 0x0) 938 return; 939 940 if (devinfo->ver >= 8 && (mask & GL_STENCIL_BUFFER_BIT)) { 941 assert(!"Invalid blit"); 942 } 943 944 _swrast_BlitFramebuffer(ctx, readFb, drawFb, 945 srcX0, srcY0, srcX1, srcY1, 946 dstX0, dstY0, dstX1, dstY1, 947 mask, filter); 948} 949 950/** 951 * Does the renderbuffer have hiz enabled? 952 */ 953bool 954brw_renderbuffer_has_hiz(struct brw_renderbuffer *irb) 955{ 956 return brw_miptree_level_has_hiz(irb->mt, irb->mt_level); 957} 958 959void 960brw_renderbuffer_move_to_temp(struct brw_context *brw, 961 struct brw_renderbuffer *irb, 962 bool invalidate) 963{ 964 struct gl_renderbuffer *rb =&irb->Base.Base; 965 struct brw_texture_image *intel_image = brw_texture_image(rb->TexImage); 966 struct brw_mipmap_tree *new_mt; 967 int width, height, depth; 968 969 brw_get_image_dims(rb->TexImage, &width, &height, &depth); 970 971 assert(irb->align_wa_mt == NULL); 972 new_mt = brw_miptree_create(brw, GL_TEXTURE_2D, 973 intel_image->base.Base.TexFormat, 974 0, 0, 975 width, height, 1, 976 irb->mt->surf.samples, 977 MIPTREE_CREATE_BUSY); 978 979 if (!invalidate) { 980 brw_miptree_copy_slice(brw, intel_image->mt, 981 intel_image->base.Base.Level, irb->mt_layer, 982 new_mt, 0, 0); 983 } 984 985 brw_miptree_reference(&irb->align_wa_mt, new_mt); 986 brw_miptree_release(&new_mt); 987 988 irb->draw_x = 0; 989 irb->draw_y = 0; 990} 991 992void 993brw_cache_sets_clear(struct brw_context *brw) 994{ 995 hash_table_foreach(brw->render_cache, render_entry) 996 _mesa_hash_table_remove(brw->render_cache, render_entry); 997 998 set_foreach(brw->depth_cache, depth_entry) 999 _mesa_set_remove(brw->depth_cache, depth_entry); 1000} 1001 1002/** 1003 * Emits an appropriate flush for a BO if it has been rendered to within the 1004 * same batchbuffer as a read that's about to be emitted. 1005 * 1006 * The GPU has separate, incoherent caches for the render cache and the 1007 * sampler cache, along with other caches. Usually data in the different 1008 * caches don't interact (e.g. we don't render to our driver-generated 1009 * immediate constant data), but for render-to-texture in FBOs we definitely 1010 * do. When a batchbuffer is flushed, the kernel will ensure that everything 1011 * necessary is flushed before another use of that BO, but for reuse from 1012 * different caches within a batchbuffer, it's all our responsibility. 1013 */ 1014static void 1015flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo) 1016{ 1017 const struct intel_device_info *devinfo = &brw->screen->devinfo; 1018 1019 if (devinfo->ver >= 6) { 1020 brw_emit_pipe_control_flush(brw, 1021 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 1022 PIPE_CONTROL_RENDER_TARGET_FLUSH | 1023 PIPE_CONTROL_CS_STALL); 1024 1025 brw_emit_pipe_control_flush(brw, 1026 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 1027 PIPE_CONTROL_CONST_CACHE_INVALIDATE); 1028 } else { 1029 brw_emit_mi_flush(brw); 1030 } 1031 1032 brw_cache_sets_clear(brw); 1033} 1034 1035void 1036brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo) 1037{ 1038 if (_mesa_hash_table_search(brw->render_cache, bo) || 1039 _mesa_set_search(brw->depth_cache, bo)) 1040 flush_depth_and_render_caches(brw, bo); 1041} 1042 1043static void * 1044format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) 1045{ 1046 return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage); 1047} 1048 1049void 1050brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo, 1051 enum isl_format format, 1052 enum isl_aux_usage aux_usage) 1053{ 1054 if (_mesa_set_search(brw->depth_cache, bo)) 1055 flush_depth_and_render_caches(brw, bo); 1056 1057 /* Check to see if this bo has been used by a previous rendering operation 1058 * but with a different format or aux usage. If it has, flush the render 1059 * cache so we ensure that it's only in there with one format or aux usage 1060 * at a time. 1061 * 1062 * Even though it's not obvious, this can easily happen in practice. 1063 * Suppose a client is blending on a surface with sRGB encode enabled on 1064 * gfx9. This implies that you get AUX_USAGE_CCS_D at best. If the client 1065 * then disables sRGB decode and continues blending we will flip on 1066 * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is 1067 * perfectly valid since CCS_E is a subset of CCS_D). However, this means 1068 * that we have fragments in-flight which are rendering with UNORM+CCS_E 1069 * and other fragments in-flight with SRGB+CCS_D on the same surface at the 1070 * same time and the pixel scoreboard and color blender are trying to sort 1071 * it all out. This ends badly (i.e. GPU hangs). 1072 * 1073 * To date, we have never observed GPU hangs or even corruption to be 1074 * associated with switching the format, only the aux usage. However, 1075 * there are comments in various docs which indicate that the render cache 1076 * isn't 100% resilient to format changes. We may as well be conservative 1077 * and flush on format changes too. We can always relax this later if we 1078 * find it to be a performance problem. 1079 */ 1080 struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo); 1081 if (entry && entry->data != format_aux_tuple(format, aux_usage)) 1082 flush_depth_and_render_caches(brw, bo); 1083} 1084 1085void 1086brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo, 1087 enum isl_format format, 1088 enum isl_aux_usage aux_usage) 1089{ 1090#ifndef NDEBUG 1091 struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo); 1092 if (entry) { 1093 /* Otherwise, someone didn't do a flush_for_render and that would be 1094 * very bad indeed. 1095 */ 1096 assert(entry->data == format_aux_tuple(format, aux_usage)); 1097 } 1098#endif 1099 1100 _mesa_hash_table_insert(brw->render_cache, bo, 1101 format_aux_tuple(format, aux_usage)); 1102} 1103 1104void 1105brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo) 1106{ 1107 if (_mesa_hash_table_search(brw->render_cache, bo)) 1108 flush_depth_and_render_caches(brw, bo); 1109} 1110 1111void 1112brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo) 1113{ 1114 _mesa_set_add(brw->depth_cache, bo); 1115} 1116 1117/** 1118 * Do one-time context initializations related to GL_EXT_framebuffer_object. 1119 * Hook in device driver functions. 1120 */ 1121void 1122brw_fbo_init(struct brw_context *brw) 1123{ 1124 struct dd_function_table *dd = &brw->ctx.Driver; 1125 dd->NewRenderbuffer = brw_new_renderbuffer; 1126 dd->MapRenderbuffer = brw_map_renderbuffer; 1127 dd->UnmapRenderbuffer = brw_unmap_renderbuffer; 1128 dd->RenderTexture = brw_render_texture; 1129 dd->ValidateFramebuffer = brw_validate_framebuffer; 1130 dd->BlitFramebuffer = brw_blit_framebuffer; 1131 dd->EGLImageTargetRenderbufferStorage = 1132 brw_image_target_renderbuffer_storage; 1133 1134 brw->render_cache = _mesa_hash_table_create(brw->mem_ctx, _mesa_hash_pointer, 1135 _mesa_key_pointer_equal); 1136 brw->depth_cache = _mesa_set_create(brw->mem_ctx, _mesa_hash_pointer, 1137 _mesa_key_pointer_equal); 1138 util_dynarray_init(&brw->batch.exec_fences, NULL); 1139} 1140