1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "main/bufferobj.h"
29#include "main/image.h"
30#include "main/pbo.h"
31
32#include "main/readpix.h"
33#include "main/enums.h"
34#include "main/framebuffer.h"
35#include "util/u_inlines.h"
36#include "util/format/u_format.h"
37#include "cso_cache/cso_context.h"
38
39#include "st_cb_fbo.h"
40#include "st_atom.h"
41#include "st_context.h"
42#include "st_cb_bitmap.h"
43#include "st_cb_readpixels.h"
44#include "st_debug.h"
45#include "state_tracker/st_cb_texture.h"
46#include "state_tracker/st_format.h"
47#include "state_tracker/st_pbo.h"
48#include "state_tracker/st_texture.h"
49#include "state_tracker/st_util.h"
50
51
52/* The readpixels cache caches a blitted staging texture so that back-to-back
53 * calls to glReadPixels with user pointers require less CPU-GPU synchronization.
54 *
55 * Assumptions:
56 *
57 * (1) Blits have high synchronization overheads, and it is beneficial to
58 *     use a single blit of the entire framebuffer instead of many smaller
59 *     blits (because the smaller blits cannot be batched, and we have to wait
60 *     for the GPU after each one).
61 *
62 * (2) texture_map implicitly involves a blit as well (for de-tiling, copy
63 *     from VRAM, etc.), so that it is beneficial to replace the
64 *     _mesa_readpixels path as well when possible.
65 *
66 * Change this #define to true to fill and use the cache whenever possible
67 * (this is inefficient and only meant for testing / debugging).
68 */
69#define ALWAYS_READPIXELS_CACHE false
70
71static boolean
72needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
73                                         GLenum format, GLenum type)
74{
75   struct gl_renderbuffer *rb =
76      _mesa_get_read_renderbuffer_for_format(ctx, format);
77
78   assert(rb);
79
80   GLenum srcType = _mesa_get_format_datatype(rb->Format);
81
82    if ((srcType == GL_INT &&
83        (type == GL_UNSIGNED_INT ||
84         type == GL_UNSIGNED_SHORT ||
85         type == GL_UNSIGNED_BYTE)) ||
86       (srcType == GL_UNSIGNED_INT &&
87        (type == GL_INT ||
88         type == GL_SHORT ||
89         type == GL_BYTE))) {
90      return TRUE;
91   }
92
93   return FALSE;
94}
95
96static bool
97try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb,
98                   bool invert_y,
99                   GLint x, GLint y, GLsizei width, GLsizei height,
100                   GLenum gl_format,
101                   enum pipe_format src_format, enum pipe_format dst_format,
102                   const struct gl_pixelstore_attrib *pack, void *pixels)
103{
104   struct pipe_context *pipe = st->pipe;
105   struct pipe_screen *screen = st->screen;
106   struct cso_context *cso = st->cso_context;
107   struct pipe_surface *surface = strb->surface;
108   struct pipe_resource *texture = strb->texture;
109   const struct util_format_description *desc;
110   struct st_pbo_addresses addr;
111   struct pipe_framebuffer_state fb;
112   enum pipe_texture_target view_target;
113   bool success = false;
114
115   /* Make sure we have stencil format in case of GL_STENCIL_INDEX to
116    * create correct type of a sampler view.
117    */
118   if (gl_format == GL_STENCIL_INDEX)
119      src_format = util_format_stencil_only(src_format);
120
121   if (texture->nr_samples > 1)
122      return false;
123
124   if (!screen->is_format_supported(screen, dst_format, PIPE_BUFFER, 0, 0,
125                                    PIPE_BIND_SHADER_IMAGE))
126      return false;
127
128   desc = util_format_description(dst_format);
129
130   /* Compute PBO addresses */
131   addr.bytes_per_pixel = desc->block.bits / 8;
132   addr.xoffset = x;
133   addr.yoffset = y;
134   addr.width = width;
135   addr.height = height;
136   addr.depth = 1;
137   if (!st_pbo_addresses_pixelstore(st, GL_TEXTURE_2D, false, pack, pixels, &addr))
138      return false;
139
140   cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLERS |
141                        CSO_BIT_BLEND |
142                        CSO_BIT_VERTEX_ELEMENTS |
143                        CSO_BIT_FRAMEBUFFER |
144                        CSO_BIT_VIEWPORT |
145                        CSO_BIT_RASTERIZER |
146                        CSO_BIT_DEPTH_STENCIL_ALPHA |
147                        CSO_BIT_STREAM_OUTPUTS |
148                        (st->active_queries ? CSO_BIT_PAUSE_QUERIES : 0) |
149                        CSO_BIT_SAMPLE_MASK |
150                        CSO_BIT_MIN_SAMPLES |
151                        CSO_BIT_RENDER_CONDITION |
152                        CSO_BITS_ALL_SHADERS));
153
154   cso_set_sample_mask(cso, ~0);
155   cso_set_min_samples(cso, 1);
156   cso_set_render_condition(cso, NULL, FALSE, 0);
157
158   /* Set up the sampler_view */
159   {
160      struct pipe_sampler_view templ;
161      struct pipe_sampler_view *sampler_view;
162      struct pipe_sampler_state sampler = {0};
163      const struct pipe_sampler_state *samplers[1] = {&sampler};
164
165      u_sampler_view_default_template(&templ, texture, src_format);
166
167      switch (texture->target) {
168      case PIPE_TEXTURE_CUBE:
169      case PIPE_TEXTURE_CUBE_ARRAY:
170         view_target = PIPE_TEXTURE_2D_ARRAY;
171         break;
172      default:
173         view_target = texture->target;
174         break;
175      }
176
177      templ.target = view_target;
178      templ.u.tex.first_level = surface->u.tex.level;
179      templ.u.tex.last_level = templ.u.tex.first_level;
180
181      if (view_target != PIPE_TEXTURE_3D) {
182         templ.u.tex.first_layer = surface->u.tex.first_layer;
183         templ.u.tex.last_layer = templ.u.tex.first_layer;
184      } else {
185         addr.constants.layer_offset = surface->u.tex.first_layer;
186      }
187
188      sampler_view = pipe->create_sampler_view(pipe, texture, &templ);
189      if (sampler_view == NULL)
190         goto fail;
191
192      pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0,
193                              false, &sampler_view);
194      st->state.num_sampler_views[PIPE_SHADER_FRAGMENT] =
195         MAX2(st->state.num_sampler_views[PIPE_SHADER_FRAGMENT], 1);
196
197      pipe_sampler_view_reference(&sampler_view, NULL);
198
199      cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
200   }
201
202   /* Set up destination image */
203   {
204      struct pipe_image_view image;
205
206      memset(&image, 0, sizeof(image));
207      image.resource = addr.buffer;
208      image.format = dst_format;
209      image.access = PIPE_IMAGE_ACCESS_WRITE;
210      image.shader_access = PIPE_IMAGE_ACCESS_WRITE;
211      image.u.buf.offset = addr.first_element * addr.bytes_per_pixel;
212      image.u.buf.size = (addr.last_element - addr.first_element + 1) *
213                         addr.bytes_per_pixel;
214
215      pipe->set_shader_images(pipe, PIPE_SHADER_FRAGMENT, 0, 1, 0, &image);
216   }
217
218   /* Set up no-attachment framebuffer */
219   memset(&fb, 0, sizeof(fb));
220   fb.width = surface->width;
221   fb.height = surface->height;
222   fb.samples = 1;
223   fb.layers = 1;
224   cso_set_framebuffer(cso, &fb);
225
226   /* Any blend state would do. Set this just to prevent drivers having
227    * blend == NULL.
228    */
229   cso_set_blend(cso, &st->pbo.upload_blend);
230
231   cso_set_viewport_dims(cso, fb.width, fb.height, invert_y);
232
233   if (invert_y)
234      st_pbo_addresses_invert_y(&addr, fb.height);
235
236   {
237      struct pipe_depth_stencil_alpha_state dsa;
238      memset(&dsa, 0, sizeof(dsa));
239      cso_set_depth_stencil_alpha(cso, &dsa);
240   }
241
242   /* Set up the fragment shader */
243   {
244      void *fs = st_pbo_get_download_fs(st, view_target, src_format, dst_format, addr.depth != 1);
245      if (!fs)
246         goto fail;
247
248      cso_set_fragment_shader_handle(cso, fs);
249   }
250
251   success = st_pbo_draw(st, &addr, fb.width, fb.height);
252
253   /* Buffer written via shader images needs explicit synchronization. */
254   pipe->memory_barrier(pipe, PIPE_BARRIER_ALL);
255
256fail:
257   /* Unbind all because st/mesa won't do it if the current shader doesn't
258    * use them.
259    */
260   cso_restore_state(cso, CSO_UNBIND_FS_SAMPLERVIEWS | CSO_UNBIND_FS_IMAGE0);
261   st->state.num_sampler_views[PIPE_SHADER_FRAGMENT] = 0;
262
263   st->dirty |= ST_NEW_FS_CONSTANTS |
264                ST_NEW_FS_IMAGES |
265                ST_NEW_FS_SAMPLER_VIEWS |
266                ST_NEW_VERTEX_ARRAYS;
267
268   return success;
269}
270
271/**
272 * Create a staging texture and blit the requested region to it.
273 */
274static struct pipe_resource *
275blit_to_staging(struct st_context *st, struct st_renderbuffer *strb,
276                   bool invert_y,
277                   GLint x, GLint y, GLsizei width, GLsizei height,
278                   GLenum format,
279                   enum pipe_format src_format, enum pipe_format dst_format)
280{
281   struct pipe_screen *screen = st->screen;
282   struct pipe_resource dst_templ;
283   struct pipe_resource *dst;
284   struct pipe_blit_info blit;
285
286   /* We are creating a texture of the size of the region being read back.
287    * Need to check for NPOT texture support. */
288   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
289       (!util_is_power_of_two_or_zero(width) ||
290        !util_is_power_of_two_or_zero(height)))
291      return NULL;
292
293   /* create the destination texture */
294   memset(&dst_templ, 0, sizeof(dst_templ));
295   dst_templ.target = PIPE_TEXTURE_2D;
296   dst_templ.format = dst_format;
297   if (util_format_is_depth_or_stencil(dst_format))
298      dst_templ.bind |= PIPE_BIND_DEPTH_STENCIL;
299   else
300      dst_templ.bind |= PIPE_BIND_RENDER_TARGET;
301   dst_templ.usage = PIPE_USAGE_STAGING;
302
303   st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
304                                   &dst_templ.width0, &dst_templ.height0,
305                                   &dst_templ.depth0, &dst_templ.array_size);
306
307   dst = screen->resource_create(screen, &dst_templ);
308   if (!dst)
309      return NULL;
310
311   memset(&blit, 0, sizeof(blit));
312   blit.src.resource = strb->texture;
313   blit.src.level = strb->surface->u.tex.level;
314   blit.src.format = src_format;
315   blit.dst.resource = dst;
316   blit.dst.level = 0;
317   blit.dst.format = dst->format;
318   blit.src.box.x = x;
319   blit.dst.box.x = 0;
320   blit.src.box.y = y;
321   blit.dst.box.y = 0;
322   blit.src.box.z = strb->surface->u.tex.first_layer;
323   blit.dst.box.z = 0;
324   blit.src.box.width = blit.dst.box.width = width;
325   blit.src.box.height = blit.dst.box.height = height;
326   blit.src.box.depth = blit.dst.box.depth = 1;
327   blit.mask = st_get_blit_mask(strb->Base._BaseFormat, format);
328   blit.filter = PIPE_TEX_FILTER_NEAREST;
329   blit.scissor_enable = FALSE;
330
331   if (invert_y) {
332      blit.src.box.y = strb->Base.Height - blit.src.box.y;
333      blit.src.box.height = -blit.src.box.height;
334   }
335
336   /* blit */
337   st->pipe->blit(st->pipe, &blit);
338
339   return dst;
340}
341
342static struct pipe_resource *
343try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb,
344                      bool invert_y,
345                      GLsizei width, GLsizei height,
346                      GLenum format,
347                      enum pipe_format src_format, enum pipe_format dst_format)
348{
349   struct pipe_resource *src = strb->texture;
350   struct pipe_resource *dst = NULL;
351
352   if (ST_DEBUG & DEBUG_NOREADPIXCACHE)
353      return NULL;
354
355   /* Reset cache after invalidation or switch of parameters. */
356   if (st->readpix_cache.src != src ||
357       st->readpix_cache.dst_format != dst_format ||
358       st->readpix_cache.level != strb->surface->u.tex.level ||
359       st->readpix_cache.layer != strb->surface->u.tex.first_layer) {
360      pipe_resource_reference(&st->readpix_cache.src, src);
361      pipe_resource_reference(&st->readpix_cache.cache, NULL);
362      st->readpix_cache.dst_format = dst_format;
363      st->readpix_cache.level = strb->surface->u.tex.level;
364      st->readpix_cache.layer = strb->surface->u.tex.first_layer;
365      st->readpix_cache.hits = 0;
366   }
367
368   /* Decide whether to trigger the cache. */
369   if (!st->readpix_cache.cache) {
370      if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) {
371         /* Heuristic: If previous successive calls read at least a fraction
372          * of the surface _and_ we read again, trigger the cache.
373          */
374         unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8);
375
376         if (st->readpix_cache.hits < threshold) {
377            st->readpix_cache.hits += width * height;
378            return NULL;
379         }
380
381         strb->use_readpix_cache = true;
382      }
383
384      /* Fill the cache */
385      st->readpix_cache.cache = blit_to_staging(st, strb, invert_y,
386                                                0, 0,
387                                                strb->Base.Width,
388                                                strb->Base.Height, format,
389                                                src_format, dst_format);
390   }
391
392   /* Return an owning reference to stay consistent with the non-cached path */
393   pipe_resource_reference(&dst, st->readpix_cache.cache);
394
395   return dst;
396}
397
398/**
399 * This uses a blit to copy the read buffer to a texture format which matches
400 * the format and type combo and then a fast read-back is done using memcpy.
401 * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
402 * a format which matches the swizzling.
403 *
404 * If such a format isn't available, we fall back to _mesa_readpixels.
405 *
406 * NOTE: Some drivers use a blit to convert between tiled and linear
407 *       texture layouts during texture uploads/downloads, so the blit
408 *       we do here should be free in such cases.
409 */
410static void
411st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
412              GLsizei width, GLsizei height,
413              GLenum format, GLenum type,
414              const struct gl_pixelstore_attrib *pack,
415              void *pixels)
416{
417   struct st_context *st = st_context(ctx);
418   struct gl_renderbuffer *rb =
419         _mesa_get_read_renderbuffer_for_format(ctx, format);
420   struct st_renderbuffer *strb = st_renderbuffer(rb);
421   struct pipe_context *pipe = st->pipe;
422   struct pipe_screen *screen = st->screen;
423   struct pipe_resource *src;
424   struct pipe_resource *dst = NULL;
425   enum pipe_format dst_format, src_format;
426   unsigned bind;
427   struct pipe_transfer *tex_xfer;
428   ubyte *map = NULL;
429   int dst_x, dst_y;
430
431   /* Validate state (to be sure we have up-to-date framebuffer surfaces)
432    * and flush the bitmap cache prior to reading. */
433   st_validate_state(st, ST_PIPELINE_UPDATE_FRAMEBUFFER);
434   st_flush_bitmap_cache(st);
435
436   if (!st->prefer_blit_based_texture_transfer) {
437      goto fallback;
438   }
439
440   /* This must be done after state validation. */
441   src = strb->texture;
442
443   /* XXX Fallback for depth-stencil formats due to an incomplete
444    * stencil blit implementation in some drivers. */
445   if (format == GL_DEPTH_STENCIL) {
446      goto fallback;
447   }
448
449   /* If the base internal format and the texture format don't match, we have
450    * to use the slow path. */
451   if (rb->_BaseFormat !=
452       _mesa_get_format_base_format(rb->Format)) {
453      goto fallback;
454   }
455
456   if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
457      goto fallback;
458   }
459
460   /* Convert the source format to what is expected by ReadPixels
461    * and see if it's supported. */
462   src_format = util_format_linear(strb->Base.Format);
463   src_format = util_format_luminance_to_red(src_format);
464   src_format = util_format_intensity_to_red(src_format);
465
466   if (!src_format ||
467       !screen->is_format_supported(screen, src_format, src->target,
468                                    src->nr_samples, src->nr_storage_samples,
469                                    PIPE_BIND_SAMPLER_VIEW)) {
470      goto fallback;
471   }
472
473   if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
474      bind = PIPE_BIND_DEPTH_STENCIL;
475   else
476      bind = PIPE_BIND_RENDER_TARGET;
477
478   /* Choose the destination format by finding the best match
479    * for the format+type combo. */
480   dst_format = st_choose_matching_format(st, bind, format, type,
481                                          pack->SwapBytes);
482   if (dst_format == PIPE_FORMAT_NONE) {
483      goto fallback;
484   }
485
486   if (st->pbo.download_enabled && pack->BufferObj) {
487      if (try_pbo_readpixels(st, strb,
488                             st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
489                             x, y, width, height,
490                             format, src_format, dst_format,
491                             pack, pixels))
492         return;
493   }
494
495   if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
496      goto fallback;
497   }
498
499   /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU
500    * synchronization overhead.
501    */
502   dst = try_cached_readpixels(st, strb,
503                               st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
504                               width, height, format, src_format, dst_format);
505   if (dst) {
506      dst_x = x;
507      dst_y = y;
508   } else {
509      /* See if the texture format already matches the format and type,
510       * in which case the memcpy-based fast path will likely be used and
511       * we don't have to blit. */
512      if (_mesa_format_matches_format_and_type(rb->Format, format,
513                                               type, pack->SwapBytes, NULL)) {
514         goto fallback;
515      }
516
517      dst = blit_to_staging(st, strb,
518                            st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
519                            x, y, width, height, format,
520                            src_format, dst_format);
521      if (!dst)
522         goto fallback;
523
524      dst_x = 0;
525      dst_y = 0;
526   }
527
528   /* map resources */
529   pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
530
531   map = pipe_texture_map_3d(pipe, dst, 0, PIPE_MAP_READ,
532                              dst_x, dst_y, 0, width, height, 1, &tex_xfer);
533   if (!map) {
534      _mesa_unmap_pbo_dest(ctx, pack);
535      pipe_resource_reference(&dst, NULL);
536      goto fallback;
537   }
538
539   /* memcpy data into a user buffer */
540   {
541      const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
542      const int destStride = _mesa_image_row_stride(pack, width, format, type);
543      char *dest = _mesa_image_address2d(pack, pixels,
544                                         width, height, format,
545                                         type, 0, 0);
546
547      if (tex_xfer->stride == bytesPerRow && destStride == bytesPerRow) {
548         memcpy(dest, map, bytesPerRow * height);
549      } else {
550         GLuint row;
551
552         for (row = 0; row < (unsigned) height; row++) {
553            memcpy(dest, map, bytesPerRow);
554            map += tex_xfer->stride;
555            dest += destStride;
556         }
557      }
558   }
559
560   pipe_texture_unmap(pipe, tex_xfer);
561   _mesa_unmap_pbo_dest(ctx, pack);
562   pipe_resource_reference(&dst, NULL);
563   return;
564
565fallback:
566   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
567}
568
569void st_init_readpixels_functions(struct dd_function_table *functions)
570{
571   functions->ReadPixels = st_ReadPixels;
572}
573