1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "main/bufferobj.h" 29#include "main/image.h" 30#include "main/pbo.h" 31 32#include "main/readpix.h" 33#include "main/enums.h" 34#include "main/framebuffer.h" 35#include "util/u_inlines.h" 36#include "util/format/u_format.h" 37#include "cso_cache/cso_context.h" 38 39#include "st_cb_fbo.h" 40#include "st_atom.h" 41#include "st_context.h" 42#include "st_cb_bitmap.h" 43#include "st_cb_readpixels.h" 44#include "st_debug.h" 45#include "state_tracker/st_cb_texture.h" 46#include "state_tracker/st_format.h" 47#include "state_tracker/st_pbo.h" 48#include "state_tracker/st_texture.h" 49#include "state_tracker/st_util.h" 50 51 52/* The readpixels cache caches a blitted staging texture so that back-to-back 53 * calls to glReadPixels with user pointers require less CPU-GPU synchronization. 54 * 55 * Assumptions: 56 * 57 * (1) Blits have high synchronization overheads, and it is beneficial to 58 * use a single blit of the entire framebuffer instead of many smaller 59 * blits (because the smaller blits cannot be batched, and we have to wait 60 * for the GPU after each one). 61 * 62 * (2) texture_map implicitly involves a blit as well (for de-tiling, copy 63 * from VRAM, etc.), so that it is beneficial to replace the 64 * _mesa_readpixels path as well when possible. 65 * 66 * Change this #define to true to fill and use the cache whenever possible 67 * (this is inefficient and only meant for testing / debugging). 68 */ 69#define ALWAYS_READPIXELS_CACHE false 70 71static boolean 72needs_integer_signed_unsigned_conversion(const struct gl_context *ctx, 73 GLenum format, GLenum type) 74{ 75 struct gl_renderbuffer *rb = 76 _mesa_get_read_renderbuffer_for_format(ctx, format); 77 78 assert(rb); 79 80 GLenum srcType = _mesa_get_format_datatype(rb->Format); 81 82 if ((srcType == GL_INT && 83 (type == GL_UNSIGNED_INT || 84 type == GL_UNSIGNED_SHORT || 85 type == GL_UNSIGNED_BYTE)) || 86 (srcType == GL_UNSIGNED_INT && 87 (type == GL_INT || 88 type == GL_SHORT || 89 type == GL_BYTE))) { 90 return TRUE; 91 } 92 93 return FALSE; 94} 95 96static bool 97try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb, 98 bool invert_y, 99 GLint x, GLint y, GLsizei width, GLsizei height, 100 GLenum gl_format, 101 enum pipe_format src_format, enum pipe_format dst_format, 102 const struct gl_pixelstore_attrib *pack, void *pixels) 103{ 104 struct pipe_context *pipe = st->pipe; 105 struct pipe_screen *screen = st->screen; 106 struct cso_context *cso = st->cso_context; 107 struct pipe_surface *surface = strb->surface; 108 struct pipe_resource *texture = strb->texture; 109 const struct util_format_description *desc; 110 struct st_pbo_addresses addr; 111 struct pipe_framebuffer_state fb; 112 enum pipe_texture_target view_target; 113 bool success = false; 114 115 /* Make sure we have stencil format in case of GL_STENCIL_INDEX to 116 * create correct type of a sampler view. 117 */ 118 if (gl_format == GL_STENCIL_INDEX) 119 src_format = util_format_stencil_only(src_format); 120 121 if (texture->nr_samples > 1) 122 return false; 123 124 if (!screen->is_format_supported(screen, dst_format, PIPE_BUFFER, 0, 0, 125 PIPE_BIND_SHADER_IMAGE)) 126 return false; 127 128 desc = util_format_description(dst_format); 129 130 /* Compute PBO addresses */ 131 addr.bytes_per_pixel = desc->block.bits / 8; 132 addr.xoffset = x; 133 addr.yoffset = y; 134 addr.width = width; 135 addr.height = height; 136 addr.depth = 1; 137 if (!st_pbo_addresses_pixelstore(st, GL_TEXTURE_2D, false, pack, pixels, &addr)) 138 return false; 139 140 cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLERS | 141 CSO_BIT_BLEND | 142 CSO_BIT_VERTEX_ELEMENTS | 143 CSO_BIT_FRAMEBUFFER | 144 CSO_BIT_VIEWPORT | 145 CSO_BIT_RASTERIZER | 146 CSO_BIT_DEPTH_STENCIL_ALPHA | 147 CSO_BIT_STREAM_OUTPUTS | 148 (st->active_queries ? CSO_BIT_PAUSE_QUERIES : 0) | 149 CSO_BIT_SAMPLE_MASK | 150 CSO_BIT_MIN_SAMPLES | 151 CSO_BIT_RENDER_CONDITION | 152 CSO_BITS_ALL_SHADERS)); 153 154 cso_set_sample_mask(cso, ~0); 155 cso_set_min_samples(cso, 1); 156 cso_set_render_condition(cso, NULL, FALSE, 0); 157 158 /* Set up the sampler_view */ 159 { 160 struct pipe_sampler_view templ; 161 struct pipe_sampler_view *sampler_view; 162 struct pipe_sampler_state sampler = {0}; 163 const struct pipe_sampler_state *samplers[1] = {&sampler}; 164 165 u_sampler_view_default_template(&templ, texture, src_format); 166 167 switch (texture->target) { 168 case PIPE_TEXTURE_CUBE: 169 case PIPE_TEXTURE_CUBE_ARRAY: 170 view_target = PIPE_TEXTURE_2D_ARRAY; 171 break; 172 default: 173 view_target = texture->target; 174 break; 175 } 176 177 templ.target = view_target; 178 templ.u.tex.first_level = surface->u.tex.level; 179 templ.u.tex.last_level = templ.u.tex.first_level; 180 181 if (view_target != PIPE_TEXTURE_3D) { 182 templ.u.tex.first_layer = surface->u.tex.first_layer; 183 templ.u.tex.last_layer = templ.u.tex.first_layer; 184 } else { 185 addr.constants.layer_offset = surface->u.tex.first_layer; 186 } 187 188 sampler_view = pipe->create_sampler_view(pipe, texture, &templ); 189 if (sampler_view == NULL) 190 goto fail; 191 192 pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, 193 false, &sampler_view); 194 st->state.num_sampler_views[PIPE_SHADER_FRAGMENT] = 195 MAX2(st->state.num_sampler_views[PIPE_SHADER_FRAGMENT], 1); 196 197 pipe_sampler_view_reference(&sampler_view, NULL); 198 199 cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers); 200 } 201 202 /* Set up destination image */ 203 { 204 struct pipe_image_view image; 205 206 memset(&image, 0, sizeof(image)); 207 image.resource = addr.buffer; 208 image.format = dst_format; 209 image.access = PIPE_IMAGE_ACCESS_WRITE; 210 image.shader_access = PIPE_IMAGE_ACCESS_WRITE; 211 image.u.buf.offset = addr.first_element * addr.bytes_per_pixel; 212 image.u.buf.size = (addr.last_element - addr.first_element + 1) * 213 addr.bytes_per_pixel; 214 215 pipe->set_shader_images(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &image); 216 } 217 218 /* Set up no-attachment framebuffer */ 219 memset(&fb, 0, sizeof(fb)); 220 fb.width = surface->width; 221 fb.height = surface->height; 222 fb.samples = 1; 223 fb.layers = 1; 224 cso_set_framebuffer(cso, &fb); 225 226 /* Any blend state would do. Set this just to prevent drivers having 227 * blend == NULL. 228 */ 229 cso_set_blend(cso, &st->pbo.upload_blend); 230 231 cso_set_viewport_dims(cso, fb.width, fb.height, invert_y); 232 233 if (invert_y) 234 st_pbo_addresses_invert_y(&addr, fb.height); 235 236 { 237 struct pipe_depth_stencil_alpha_state dsa; 238 memset(&dsa, 0, sizeof(dsa)); 239 cso_set_depth_stencil_alpha(cso, &dsa); 240 } 241 242 /* Set up the fragment shader */ 243 { 244 void *fs = st_pbo_get_download_fs(st, view_target, src_format, dst_format, addr.depth != 1); 245 if (!fs) 246 goto fail; 247 248 cso_set_fragment_shader_handle(cso, fs); 249 } 250 251 success = st_pbo_draw(st, &addr, fb.width, fb.height); 252 253 /* Buffer written via shader images needs explicit synchronization. */ 254 pipe->memory_barrier(pipe, PIPE_BARRIER_ALL); 255 256fail: 257 /* Unbind all because st/mesa won't do it if the current shader doesn't 258 * use them. 259 */ 260 cso_restore_state(cso, CSO_UNBIND_FS_SAMPLERVIEWS | CSO_UNBIND_FS_IMAGE0); 261 st->state.num_sampler_views[PIPE_SHADER_FRAGMENT] = 0; 262 263 st->dirty |= ST_NEW_FS_CONSTANTS | 264 ST_NEW_FS_IMAGES | 265 ST_NEW_FS_SAMPLER_VIEWS | 266 ST_NEW_VERTEX_ARRAYS; 267 268 return success; 269} 270 271/** 272 * Create a staging texture and blit the requested region to it. 273 */ 274static struct pipe_resource * 275blit_to_staging(struct st_context *st, struct st_renderbuffer *strb, 276 bool invert_y, 277 GLint x, GLint y, GLsizei width, GLsizei height, 278 GLenum format, 279 enum pipe_format src_format, enum pipe_format dst_format) 280{ 281 struct pipe_screen *screen = st->screen; 282 struct pipe_resource dst_templ; 283 struct pipe_resource *dst; 284 struct pipe_blit_info blit; 285 286 /* We are creating a texture of the size of the region being read back. 287 * Need to check for NPOT texture support. */ 288 if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) && 289 (!util_is_power_of_two_or_zero(width) || 290 !util_is_power_of_two_or_zero(height))) 291 return NULL; 292 293 /* create the destination texture */ 294 memset(&dst_templ, 0, sizeof(dst_templ)); 295 dst_templ.target = PIPE_TEXTURE_2D; 296 dst_templ.format = dst_format; 297 if (util_format_is_depth_or_stencil(dst_format)) 298 dst_templ.bind |= PIPE_BIND_DEPTH_STENCIL; 299 else 300 dst_templ.bind |= PIPE_BIND_RENDER_TARGET; 301 dst_templ.usage = PIPE_USAGE_STAGING; 302 303 st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1, 304 &dst_templ.width0, &dst_templ.height0, 305 &dst_templ.depth0, &dst_templ.array_size); 306 307 dst = screen->resource_create(screen, &dst_templ); 308 if (!dst) 309 return NULL; 310 311 memset(&blit, 0, sizeof(blit)); 312 blit.src.resource = strb->texture; 313 blit.src.level = strb->surface->u.tex.level; 314 blit.src.format = src_format; 315 blit.dst.resource = dst; 316 blit.dst.level = 0; 317 blit.dst.format = dst->format; 318 blit.src.box.x = x; 319 blit.dst.box.x = 0; 320 blit.src.box.y = y; 321 blit.dst.box.y = 0; 322 blit.src.box.z = strb->surface->u.tex.first_layer; 323 blit.dst.box.z = 0; 324 blit.src.box.width = blit.dst.box.width = width; 325 blit.src.box.height = blit.dst.box.height = height; 326 blit.src.box.depth = blit.dst.box.depth = 1; 327 blit.mask = st_get_blit_mask(strb->Base._BaseFormat, format); 328 blit.filter = PIPE_TEX_FILTER_NEAREST; 329 blit.scissor_enable = FALSE; 330 331 if (invert_y) { 332 blit.src.box.y = strb->Base.Height - blit.src.box.y; 333 blit.src.box.height = -blit.src.box.height; 334 } 335 336 /* blit */ 337 st->pipe->blit(st->pipe, &blit); 338 339 return dst; 340} 341 342static struct pipe_resource * 343try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb, 344 bool invert_y, 345 GLsizei width, GLsizei height, 346 GLenum format, 347 enum pipe_format src_format, enum pipe_format dst_format) 348{ 349 struct pipe_resource *src = strb->texture; 350 struct pipe_resource *dst = NULL; 351 352 if (ST_DEBUG & DEBUG_NOREADPIXCACHE) 353 return NULL; 354 355 /* Reset cache after invalidation or switch of parameters. */ 356 if (st->readpix_cache.src != src || 357 st->readpix_cache.dst_format != dst_format || 358 st->readpix_cache.level != strb->surface->u.tex.level || 359 st->readpix_cache.layer != strb->surface->u.tex.first_layer) { 360 pipe_resource_reference(&st->readpix_cache.src, src); 361 pipe_resource_reference(&st->readpix_cache.cache, NULL); 362 st->readpix_cache.dst_format = dst_format; 363 st->readpix_cache.level = strb->surface->u.tex.level; 364 st->readpix_cache.layer = strb->surface->u.tex.first_layer; 365 st->readpix_cache.hits = 0; 366 } 367 368 /* Decide whether to trigger the cache. */ 369 if (!st->readpix_cache.cache) { 370 if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) { 371 /* Heuristic: If previous successive calls read at least a fraction 372 * of the surface _and_ we read again, trigger the cache. 373 */ 374 unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8); 375 376 if (st->readpix_cache.hits < threshold) { 377 st->readpix_cache.hits += width * height; 378 return NULL; 379 } 380 381 strb->use_readpix_cache = true; 382 } 383 384 /* Fill the cache */ 385 st->readpix_cache.cache = blit_to_staging(st, strb, invert_y, 386 0, 0, 387 strb->Base.Width, 388 strb->Base.Height, format, 389 src_format, dst_format); 390 } 391 392 /* Return an owning reference to stay consistent with the non-cached path */ 393 pipe_resource_reference(&dst, st->readpix_cache.cache); 394 395 return dst; 396} 397 398/** 399 * This uses a blit to copy the read buffer to a texture format which matches 400 * the format and type combo and then a fast read-back is done using memcpy. 401 * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is 402 * a format which matches the swizzling. 403 * 404 * If such a format isn't available, we fall back to _mesa_readpixels. 405 * 406 * NOTE: Some drivers use a blit to convert between tiled and linear 407 * texture layouts during texture uploads/downloads, so the blit 408 * we do here should be free in such cases. 409 */ 410static void 411st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, 412 GLsizei width, GLsizei height, 413 GLenum format, GLenum type, 414 const struct gl_pixelstore_attrib *pack, 415 void *pixels) 416{ 417 struct st_context *st = st_context(ctx); 418 struct gl_renderbuffer *rb = 419 _mesa_get_read_renderbuffer_for_format(ctx, format); 420 struct st_renderbuffer *strb = st_renderbuffer(rb); 421 struct pipe_context *pipe = st->pipe; 422 struct pipe_screen *screen = st->screen; 423 struct pipe_resource *src; 424 struct pipe_resource *dst = NULL; 425 enum pipe_format dst_format, src_format; 426 unsigned bind; 427 struct pipe_transfer *tex_xfer; 428 ubyte *map = NULL; 429 int dst_x, dst_y; 430 431 /* Validate state (to be sure we have up-to-date framebuffer surfaces) 432 * and flush the bitmap cache prior to reading. */ 433 st_validate_state(st, ST_PIPELINE_UPDATE_FRAMEBUFFER); 434 st_flush_bitmap_cache(st); 435 436 if (!st->prefer_blit_based_texture_transfer) { 437 goto fallback; 438 } 439 440 /* This must be done after state validation. */ 441 src = strb->texture; 442 443 /* XXX Fallback for depth-stencil formats due to an incomplete 444 * stencil blit implementation in some drivers. */ 445 if (format == GL_DEPTH_STENCIL) { 446 goto fallback; 447 } 448 449 /* If the base internal format and the texture format don't match, we have 450 * to use the slow path. */ 451 if (rb->_BaseFormat != 452 _mesa_get_format_base_format(rb->Format)) { 453 goto fallback; 454 } 455 456 if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) { 457 goto fallback; 458 } 459 460 /* Convert the source format to what is expected by ReadPixels 461 * and see if it's supported. */ 462 src_format = util_format_linear(strb->Base.Format); 463 src_format = util_format_luminance_to_red(src_format); 464 src_format = util_format_intensity_to_red(src_format); 465 466 if (!src_format || 467 !screen->is_format_supported(screen, src_format, src->target, 468 src->nr_samples, src->nr_storage_samples, 469 PIPE_BIND_SAMPLER_VIEW)) { 470 goto fallback; 471 } 472 473 if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL) 474 bind = PIPE_BIND_DEPTH_STENCIL; 475 else 476 bind = PIPE_BIND_RENDER_TARGET; 477 478 /* Choose the destination format by finding the best match 479 * for the format+type combo. */ 480 dst_format = st_choose_matching_format(st, bind, format, type, 481 pack->SwapBytes); 482 if (dst_format == PIPE_FORMAT_NONE) { 483 goto fallback; 484 } 485 486 if (st->pbo.download_enabled && pack->BufferObj) { 487 if (try_pbo_readpixels(st, strb, 488 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, 489 x, y, width, height, 490 format, src_format, dst_format, 491 pack, pixels)) 492 return; 493 } 494 495 if (needs_integer_signed_unsigned_conversion(ctx, format, type)) { 496 goto fallback; 497 } 498 499 /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU 500 * synchronization overhead. 501 */ 502 dst = try_cached_readpixels(st, strb, 503 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, 504 width, height, format, src_format, dst_format); 505 if (dst) { 506 dst_x = x; 507 dst_y = y; 508 } else { 509 /* See if the texture format already matches the format and type, 510 * in which case the memcpy-based fast path will likely be used and 511 * we don't have to blit. */ 512 if (_mesa_format_matches_format_and_type(rb->Format, format, 513 type, pack->SwapBytes, NULL)) { 514 goto fallback; 515 } 516 517 dst = blit_to_staging(st, strb, 518 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, 519 x, y, width, height, format, 520 src_format, dst_format); 521 if (!dst) 522 goto fallback; 523 524 dst_x = 0; 525 dst_y = 0; 526 } 527 528 /* map resources */ 529 pixels = _mesa_map_pbo_dest(ctx, pack, pixels); 530 531 map = pipe_texture_map_3d(pipe, dst, 0, PIPE_MAP_READ, 532 dst_x, dst_y, 0, width, height, 1, &tex_xfer); 533 if (!map) { 534 _mesa_unmap_pbo_dest(ctx, pack); 535 pipe_resource_reference(&dst, NULL); 536 goto fallback; 537 } 538 539 /* memcpy data into a user buffer */ 540 { 541 const uint bytesPerRow = width * util_format_get_blocksize(dst_format); 542 const int destStride = _mesa_image_row_stride(pack, width, format, type); 543 char *dest = _mesa_image_address2d(pack, pixels, 544 width, height, format, 545 type, 0, 0); 546 547 if (tex_xfer->stride == bytesPerRow && destStride == bytesPerRow) { 548 memcpy(dest, map, bytesPerRow * height); 549 } else { 550 GLuint row; 551 552 for (row = 0; row < (unsigned) height; row++) { 553 memcpy(dest, map, bytesPerRow); 554 map += tex_xfer->stride; 555 dest += destStride; 556 } 557 } 558 } 559 560 pipe_texture_unmap(pipe, tex_xfer); 561 _mesa_unmap_pbo_dest(ctx, pack); 562 pipe_resource_reference(&dst, NULL); 563 return; 564 565fallback: 566 _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); 567} 568 569void st_init_readpixels_functions(struct dd_function_table *functions) 570{ 571 functions->ReadPixels = st_ReadPixels; 572} 573