101e04c3fSmrg/*
201e04c3fSmrg * Copyright 2007 VMware, Inc.
301e04c3fSmrg * Copyright 2016 Advanced Micro Devices, Inc.
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
601e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
701e04c3fSmrg * to deal in the Software without restriction, including without limitation
801e04c3fSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub
901e04c3fSmrg * license, and/or sell copies of the Software, and to permit persons to whom
1001e04c3fSmrg * the Software is furnished to do so, subject to the following conditions:
1101e04c3fSmrg *
1201e04c3fSmrg * The above copyright notice and this permission notice (including the next
1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1401e04c3fSmrg * Software.
1501e04c3fSmrg *
1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1901e04c3fSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
2001e04c3fSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
2101e04c3fSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2201e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2301e04c3fSmrg */
2401e04c3fSmrg
2501e04c3fSmrg/**
2601e04c3fSmrg * \file
2701e04c3fSmrg *
2801e04c3fSmrg * Common helper functions for PBO up- and downloads.
2901e04c3fSmrg */
3001e04c3fSmrg
3101e04c3fSmrg#include "state_tracker/st_context.h"
32b9abf16eSmaya#include "state_tracker/st_nir.h"
3301e04c3fSmrg#include "state_tracker/st_pbo.h"
3401e04c3fSmrg#include "state_tracker/st_cb_bufferobjects.h"
3501e04c3fSmrg
3601e04c3fSmrg#include "pipe/p_context.h"
3701e04c3fSmrg#include "pipe/p_defines.h"
3801e04c3fSmrg#include "pipe/p_screen.h"
3901e04c3fSmrg#include "cso_cache/cso_context.h"
4001e04c3fSmrg#include "tgsi/tgsi_ureg.h"
417ec681f3Smrg#include "util/format/u_format.h"
4201e04c3fSmrg#include "util/u_inlines.h"
4301e04c3fSmrg#include "util/u_upload_mgr.h"
4401e04c3fSmrg
45b9abf16eSmaya#include "compiler/nir/nir_builder.h"
46b9abf16eSmaya
4701e04c3fSmrg/* Conversion to apply in the fragment shader. */
4801e04c3fSmrgenum st_pbo_conversion {
497ec681f3Smrg   ST_PBO_CONVERT_FLOAT = 0,
507ec681f3Smrg   ST_PBO_CONVERT_UINT,
517ec681f3Smrg   ST_PBO_CONVERT_SINT,
5201e04c3fSmrg   ST_PBO_CONVERT_UINT_TO_SINT,
5301e04c3fSmrg   ST_PBO_CONVERT_SINT_TO_UINT,
5401e04c3fSmrg
5501e04c3fSmrg   ST_NUM_PBO_CONVERSIONS
5601e04c3fSmrg};
5701e04c3fSmrg
5801e04c3fSmrg/* Final setup of buffer addressing information.
5901e04c3fSmrg *
6001e04c3fSmrg * buf_offset is in pixels.
6101e04c3fSmrg *
6201e04c3fSmrg * Returns false if something (e.g. alignment) prevents PBO upload/download.
6301e04c3fSmrg */
6401e04c3fSmrgbool
6501e04c3fSmrgst_pbo_addresses_setup(struct st_context *st,
6601e04c3fSmrg                       struct pipe_resource *buf, intptr_t buf_offset,
6701e04c3fSmrg                       struct st_pbo_addresses *addr)
6801e04c3fSmrg{
6901e04c3fSmrg   unsigned skip_pixels;
7001e04c3fSmrg
7101e04c3fSmrg   /* Check alignment against texture buffer requirements. */
7201e04c3fSmrg   {
7301e04c3fSmrg      unsigned ofs = (buf_offset * addr->bytes_per_pixel) % st->ctx->Const.TextureBufferOffsetAlignment;
7401e04c3fSmrg      if (ofs != 0) {
7501e04c3fSmrg         if (ofs % addr->bytes_per_pixel != 0)
7601e04c3fSmrg            return false;
7701e04c3fSmrg
7801e04c3fSmrg         skip_pixels = ofs / addr->bytes_per_pixel;
7901e04c3fSmrg         buf_offset -= skip_pixels;
8001e04c3fSmrg      } else {
8101e04c3fSmrg         skip_pixels = 0;
8201e04c3fSmrg      }
8301e04c3fSmrg   }
8401e04c3fSmrg
8501e04c3fSmrg   assert(buf_offset >= 0);
8601e04c3fSmrg
8701e04c3fSmrg   addr->buffer = buf;
8801e04c3fSmrg   addr->first_element = buf_offset;
8901e04c3fSmrg   addr->last_element = buf_offset + skip_pixels + addr->width - 1
9001e04c3fSmrg         + (addr->height - 1 + (addr->depth - 1) * addr->image_height) * addr->pixels_per_row;
9101e04c3fSmrg
9201e04c3fSmrg   if (addr->last_element - addr->first_element > st->ctx->Const.MaxTextureBufferSize - 1)
9301e04c3fSmrg      return false;
9401e04c3fSmrg
9501e04c3fSmrg   /* This should be ensured by Mesa before calling our callbacks */
9601e04c3fSmrg   assert((addr->last_element + 1) * addr->bytes_per_pixel <= buf->width0);
9701e04c3fSmrg
9801e04c3fSmrg   addr->constants.xoffset = -addr->xoffset + skip_pixels;
9901e04c3fSmrg   addr->constants.yoffset = -addr->yoffset;
10001e04c3fSmrg   addr->constants.stride = addr->pixels_per_row;
10101e04c3fSmrg   addr->constants.image_size = addr->pixels_per_row * addr->image_height;
10201e04c3fSmrg   addr->constants.layer_offset = 0;
10301e04c3fSmrg
10401e04c3fSmrg   return true;
10501e04c3fSmrg}
10601e04c3fSmrg
10701e04c3fSmrg/* Validate and fill buffer addressing information based on GL pixelstore
10801e04c3fSmrg * attributes.
10901e04c3fSmrg *
11001e04c3fSmrg * Returns false if some aspect of the addressing (e.g. alignment) prevents
11101e04c3fSmrg * PBO upload/download.
11201e04c3fSmrg */
11301e04c3fSmrgbool
11401e04c3fSmrgst_pbo_addresses_pixelstore(struct st_context *st,
11501e04c3fSmrg                            GLenum gl_target, bool skip_images,
11601e04c3fSmrg                            const struct gl_pixelstore_attrib *store,
11701e04c3fSmrg                            const void *pixels,
11801e04c3fSmrg                            struct st_pbo_addresses *addr)
11901e04c3fSmrg{
12001e04c3fSmrg   struct pipe_resource *buf = st_buffer_object(store->BufferObj)->buffer;
12101e04c3fSmrg   intptr_t buf_offset = (intptr_t) pixels;
12201e04c3fSmrg
12301e04c3fSmrg   if (buf_offset % addr->bytes_per_pixel)
12401e04c3fSmrg      return false;
12501e04c3fSmrg
12601e04c3fSmrg   /* Convert to texels */
12701e04c3fSmrg   buf_offset = buf_offset / addr->bytes_per_pixel;
12801e04c3fSmrg
12901e04c3fSmrg   /* Determine image height */
13001e04c3fSmrg   if (gl_target == GL_TEXTURE_1D_ARRAY) {
13101e04c3fSmrg      addr->image_height = 1;
13201e04c3fSmrg   } else {
13301e04c3fSmrg      addr->image_height = store->ImageHeight > 0 ? store->ImageHeight : addr->height;
13401e04c3fSmrg   }
13501e04c3fSmrg
13601e04c3fSmrg   /* Compute the stride, taking store->Alignment into account */
13701e04c3fSmrg   {
13801e04c3fSmrg       unsigned pixels_per_row = store->RowLength > 0 ?
13901e04c3fSmrg                           store->RowLength : addr->width;
14001e04c3fSmrg       unsigned bytes_per_row = pixels_per_row * addr->bytes_per_pixel;
14101e04c3fSmrg       unsigned remainder = bytes_per_row % store->Alignment;
14201e04c3fSmrg       unsigned offset_rows;
14301e04c3fSmrg
14401e04c3fSmrg       if (remainder > 0)
14501e04c3fSmrg          bytes_per_row += store->Alignment - remainder;
14601e04c3fSmrg
14701e04c3fSmrg       if (bytes_per_row % addr->bytes_per_pixel)
14801e04c3fSmrg          return false;
14901e04c3fSmrg
15001e04c3fSmrg       addr->pixels_per_row = bytes_per_row / addr->bytes_per_pixel;
15101e04c3fSmrg
15201e04c3fSmrg       offset_rows = store->SkipRows;
15301e04c3fSmrg       if (skip_images)
15401e04c3fSmrg          offset_rows += addr->image_height * store->SkipImages;
15501e04c3fSmrg
15601e04c3fSmrg       buf_offset += store->SkipPixels + addr->pixels_per_row * offset_rows;
15701e04c3fSmrg   }
15801e04c3fSmrg
15901e04c3fSmrg   if (!st_pbo_addresses_setup(st, buf, buf_offset, addr))
16001e04c3fSmrg      return false;
16101e04c3fSmrg
16201e04c3fSmrg   /* Support GL_PACK_INVERT_MESA */
16301e04c3fSmrg   if (store->Invert) {
16401e04c3fSmrg      addr->constants.xoffset += (addr->height - 1) * addr->constants.stride;
16501e04c3fSmrg      addr->constants.stride = -addr->constants.stride;
16601e04c3fSmrg   }
16701e04c3fSmrg
16801e04c3fSmrg   return true;
16901e04c3fSmrg}
17001e04c3fSmrg
17101e04c3fSmrg/* For download from a framebuffer, we may have to invert the Y axis. The
17201e04c3fSmrg * setup is as follows:
17301e04c3fSmrg * - set viewport to inverted, so that the position sysval is correct for
17401e04c3fSmrg *   texel fetches
17501e04c3fSmrg * - this function adjusts the fragment shader's constant buffer to compute
17601e04c3fSmrg *   the correct destination addresses.
17701e04c3fSmrg */
17801e04c3fSmrgvoid
17901e04c3fSmrgst_pbo_addresses_invert_y(struct st_pbo_addresses *addr,
18001e04c3fSmrg                          unsigned viewport_height)
18101e04c3fSmrg{
18201e04c3fSmrg   addr->constants.xoffset +=
18301e04c3fSmrg      (viewport_height - 1 + 2 * addr->constants.yoffset) * addr->constants.stride;
18401e04c3fSmrg   addr->constants.stride = -addr->constants.stride;
18501e04c3fSmrg}
18601e04c3fSmrg
18701e04c3fSmrg/* Setup all vertex pipeline state, rasterizer state, and fragment shader
18801e04c3fSmrg * constants, and issue the draw call for PBO upload/download.
18901e04c3fSmrg *
19001e04c3fSmrg * The caller is responsible for saving and restoring state, as well as for
19101e04c3fSmrg * setting other fragment shader state (fragment shader, samplers), and
19201e04c3fSmrg * framebuffer/viewport/DSA/blend state.
19301e04c3fSmrg */
19401e04c3fSmrgbool
19501e04c3fSmrgst_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr,
19601e04c3fSmrg            unsigned surface_width, unsigned surface_height)
19701e04c3fSmrg{
19801e04c3fSmrg   struct cso_context *cso = st->cso_context;
1997ec681f3Smrg   struct pipe_context *pipe = st->pipe;
20001e04c3fSmrg
20101e04c3fSmrg   /* Setup vertex and geometry shaders */
20201e04c3fSmrg   if (!st->pbo.vs) {
20301e04c3fSmrg      st->pbo.vs = st_pbo_create_vs(st);
20401e04c3fSmrg      if (!st->pbo.vs)
20501e04c3fSmrg         return false;
20601e04c3fSmrg   }
20701e04c3fSmrg
20801e04c3fSmrg   if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) {
20901e04c3fSmrg      st->pbo.gs = st_pbo_create_gs(st);
21001e04c3fSmrg      if (!st->pbo.gs)
21101e04c3fSmrg         return false;
21201e04c3fSmrg   }
21301e04c3fSmrg
21401e04c3fSmrg   cso_set_vertex_shader_handle(cso, st->pbo.vs);
21501e04c3fSmrg
21601e04c3fSmrg   cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL);
21701e04c3fSmrg
21801e04c3fSmrg   cso_set_tessctrl_shader_handle(cso, NULL);
21901e04c3fSmrg
22001e04c3fSmrg   cso_set_tesseval_shader_handle(cso, NULL);
22101e04c3fSmrg
22201e04c3fSmrg   /* Upload vertices */
22301e04c3fSmrg   {
22401e04c3fSmrg      struct pipe_vertex_buffer vbo = {0};
2257ec681f3Smrg      struct cso_velems_state velem;
22601e04c3fSmrg
22701e04c3fSmrg      float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f;
22801e04c3fSmrg      float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f;
22901e04c3fSmrg      float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f;
23001e04c3fSmrg      float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f;
23101e04c3fSmrg
23201e04c3fSmrg      float *verts = NULL;
23301e04c3fSmrg
23401e04c3fSmrg      vbo.stride = 2 * sizeof(float);
23501e04c3fSmrg
23601e04c3fSmrg      u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4,
23701e04c3fSmrg                     &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts);
23801e04c3fSmrg      if (!verts)
23901e04c3fSmrg         return false;
24001e04c3fSmrg
24101e04c3fSmrg      verts[0] = x0;
24201e04c3fSmrg      verts[1] = y0;
24301e04c3fSmrg      verts[2] = x0;
24401e04c3fSmrg      verts[3] = y1;
24501e04c3fSmrg      verts[4] = x1;
24601e04c3fSmrg      verts[5] = y0;
24701e04c3fSmrg      verts[6] = x1;
24801e04c3fSmrg      verts[7] = y1;
24901e04c3fSmrg
25001e04c3fSmrg      u_upload_unmap(st->pipe->stream_uploader);
25101e04c3fSmrg
2527ec681f3Smrg      velem.count = 1;
2537ec681f3Smrg      velem.velems[0].src_offset = 0;
2547ec681f3Smrg      velem.velems[0].instance_divisor = 0;
2557ec681f3Smrg      velem.velems[0].vertex_buffer_index = 0;
2567ec681f3Smrg      velem.velems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
2577ec681f3Smrg      velem.velems[0].dual_slot = false;
25801e04c3fSmrg
2597ec681f3Smrg      cso_set_vertex_elements(cso, &velem);
26001e04c3fSmrg
2617ec681f3Smrg      cso_set_vertex_buffers(cso, 0, 1, &vbo);
2627ec681f3Smrg      st->last_num_vbuffers = MAX2(st->last_num_vbuffers, 1);
26301e04c3fSmrg
26401e04c3fSmrg      pipe_resource_reference(&vbo.buffer.resource, NULL);
26501e04c3fSmrg   }
26601e04c3fSmrg
26701e04c3fSmrg   /* Upload constants */
26801e04c3fSmrg   {
26901e04c3fSmrg      struct pipe_constant_buffer cb;
27001e04c3fSmrg
27101e04c3fSmrg      cb.buffer = NULL;
27201e04c3fSmrg      cb.user_buffer = &addr->constants;
27301e04c3fSmrg      cb.buffer_offset = 0;
27401e04c3fSmrg      cb.buffer_size = sizeof(addr->constants);
27501e04c3fSmrg
2767ec681f3Smrg      pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, &cb);
27701e04c3fSmrg
27801e04c3fSmrg      pipe_resource_reference(&cb.buffer, NULL);
27901e04c3fSmrg   }
28001e04c3fSmrg
28101e04c3fSmrg   /* Rasterizer state */
28201e04c3fSmrg   cso_set_rasterizer(cso, &st->pbo.raster);
28301e04c3fSmrg
28401e04c3fSmrg   /* Disable stream output */
28501e04c3fSmrg   cso_set_stream_outputs(cso, 0, NULL, 0);
28601e04c3fSmrg
28701e04c3fSmrg   if (addr->depth == 1) {
28801e04c3fSmrg      cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
28901e04c3fSmrg   } else {
29001e04c3fSmrg      cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP,
29101e04c3fSmrg                                0, 4, 0, addr->depth);
29201e04c3fSmrg   }
29301e04c3fSmrg
29401e04c3fSmrg   return true;
29501e04c3fSmrg}
29601e04c3fSmrg
29701e04c3fSmrgvoid *
29801e04c3fSmrgst_pbo_create_vs(struct st_context *st)
29901e04c3fSmrg{
3007ec681f3Smrg   const struct glsl_type *vec4 = glsl_vec4_type();
3017ec681f3Smrg   const nir_shader_compiler_options *options =
3027ec681f3Smrg      st_get_nir_compiler_options(st, MESA_SHADER_VERTEX);
303b9abf16eSmaya
3047ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3057ec681f3Smrg                                                  "st/pbo VS");
30601e04c3fSmrg
3077ec681f3Smrg   nir_variable *in_pos = nir_variable_create(b.shader, nir_var_shader_in,
3087ec681f3Smrg                                              vec4, "in_pos");
3097ec681f3Smrg   in_pos->data.location = VERT_ATTRIB_POS;
31001e04c3fSmrg
3117ec681f3Smrg   nir_variable *out_pos = nir_variable_create(b.shader, nir_var_shader_out,
3127ec681f3Smrg                                               vec4, "out_pos");
3137ec681f3Smrg   out_pos->data.location = VARYING_SLOT_POS;
3147ec681f3Smrg   out_pos->data.interpolation = INTERP_MODE_NONE;
31501e04c3fSmrg
3167ec681f3Smrg   nir_copy_var(&b, out_pos, in_pos);
31701e04c3fSmrg
31801e04c3fSmrg   if (st->pbo.layers) {
3197ec681f3Smrg      nir_variable *instance_id = nir_variable_create(b.shader,
3207ec681f3Smrg                                                      nir_var_system_value,
3217ec681f3Smrg                                                      glsl_int_type(),
3227ec681f3Smrg                                                      "instance_id");
3237ec681f3Smrg      instance_id->data.location = SYSTEM_VALUE_INSTANCE_ID;
32401e04c3fSmrg
32501e04c3fSmrg      if (st->pbo.use_gs) {
3267ec681f3Smrg         unsigned swiz_x[4] = {0, 0, 0, 0};
3277ec681f3Smrg         nir_store_var(&b, out_pos,
3287ec681f3Smrg                       nir_swizzle(&b, nir_i2f32(&b, nir_load_var(&b, instance_id)), swiz_x, 4),
3297ec681f3Smrg                       (1 << 2));
33001e04c3fSmrg      } else {
3317ec681f3Smrg         nir_variable *out_layer = nir_variable_create(b.shader,
3327ec681f3Smrg                                                     nir_var_shader_out,
3337ec681f3Smrg                                                     glsl_int_type(),
3347ec681f3Smrg                                                     "out_layer");
3357ec681f3Smrg         out_layer->data.location = VARYING_SLOT_LAYER;
3367ec681f3Smrg         out_layer->data.interpolation = INTERP_MODE_NONE;
3377ec681f3Smrg         nir_copy_var(&b, out_layer, instance_id);
33801e04c3fSmrg      }
33901e04c3fSmrg   }
34001e04c3fSmrg
3417ec681f3Smrg   return st_nir_finish_builtin_shader(st, b.shader);
34201e04c3fSmrg}
34301e04c3fSmrg
34401e04c3fSmrgvoid *
34501e04c3fSmrgst_pbo_create_gs(struct st_context *st)
34601e04c3fSmrg{
34701e04c3fSmrg   static const int zero = 0;
34801e04c3fSmrg   struct ureg_program *ureg;
34901e04c3fSmrg   struct ureg_dst out_pos;
35001e04c3fSmrg   struct ureg_dst out_layer;
35101e04c3fSmrg   struct ureg_src in_pos;
35201e04c3fSmrg   struct ureg_src imm;
35301e04c3fSmrg   unsigned i;
35401e04c3fSmrg
35501e04c3fSmrg   ureg = ureg_create(PIPE_SHADER_GEOMETRY);
35601e04c3fSmrg   if (!ureg)
35701e04c3fSmrg      return NULL;
35801e04c3fSmrg
35901e04c3fSmrg   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES);
36001e04c3fSmrg   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP);
36101e04c3fSmrg   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3);
36201e04c3fSmrg
36301e04c3fSmrg   out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
36401e04c3fSmrg   out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
36501e04c3fSmrg
36601e04c3fSmrg   in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1);
36701e04c3fSmrg
36801e04c3fSmrg   imm = ureg_DECL_immediate_int(ureg, &zero, 1);
36901e04c3fSmrg
37001e04c3fSmrg   for (i = 0; i < 3; ++i) {
37101e04c3fSmrg      struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i);
37201e04c3fSmrg
37301e04c3fSmrg      /* out_pos = in_pos[i] */
37401e04c3fSmrg      ureg_MOV(ureg, out_pos, in_pos_vertex);
37501e04c3fSmrg
37601e04c3fSmrg      /* out_layer.x = f2i(in_pos[i].z) */
37701e04c3fSmrg      ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
37801e04c3fSmrg                     ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z));
37901e04c3fSmrg
38001e04c3fSmrg      ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X));
38101e04c3fSmrg   }
38201e04c3fSmrg
38301e04c3fSmrg   ureg_END(ureg);
38401e04c3fSmrg
38501e04c3fSmrg   return ureg_create_shader_and_destroy(ureg, st->pipe);
38601e04c3fSmrg}
38701e04c3fSmrg
388b9abf16eSmayastatic const struct glsl_type *
3897ec681f3Smrgsampler_type_for_target(enum pipe_texture_target target,
3907ec681f3Smrg                        enum st_pbo_conversion conv)
391b9abf16eSmaya{
392b9abf16eSmaya   bool is_array = target >= PIPE_TEXTURE_1D_ARRAY;
393b9abf16eSmaya   static const enum glsl_sampler_dim dim[] = {
394b9abf16eSmaya      [PIPE_BUFFER]             = GLSL_SAMPLER_DIM_BUF,
395b9abf16eSmaya      [PIPE_TEXTURE_1D]         = GLSL_SAMPLER_DIM_1D,
396b9abf16eSmaya      [PIPE_TEXTURE_2D]         = GLSL_SAMPLER_DIM_2D,
397b9abf16eSmaya      [PIPE_TEXTURE_3D]         = GLSL_SAMPLER_DIM_3D,
398b9abf16eSmaya      [PIPE_TEXTURE_CUBE]       = GLSL_SAMPLER_DIM_CUBE,
399b9abf16eSmaya      [PIPE_TEXTURE_RECT]       = GLSL_SAMPLER_DIM_RECT,
400b9abf16eSmaya      [PIPE_TEXTURE_1D_ARRAY]   = GLSL_SAMPLER_DIM_1D,
401b9abf16eSmaya      [PIPE_TEXTURE_2D_ARRAY]   = GLSL_SAMPLER_DIM_2D,
402b9abf16eSmaya      [PIPE_TEXTURE_CUBE_ARRAY] = GLSL_SAMPLER_DIM_CUBE,
403b9abf16eSmaya   };
404b9abf16eSmaya
4057ec681f3Smrg   static const enum glsl_base_type type[] = {
4067ec681f3Smrg      [ST_PBO_CONVERT_FLOAT] = GLSL_TYPE_FLOAT,
4077ec681f3Smrg      [ST_PBO_CONVERT_UINT] = GLSL_TYPE_UINT,
4087ec681f3Smrg      [ST_PBO_CONVERT_UINT_TO_SINT] = GLSL_TYPE_UINT,
4097ec681f3Smrg      [ST_PBO_CONVERT_SINT] = GLSL_TYPE_INT,
4107ec681f3Smrg      [ST_PBO_CONVERT_SINT_TO_UINT] = GLSL_TYPE_INT,
4117ec681f3Smrg   };
4127ec681f3Smrg
4137ec681f3Smrg   return glsl_sampler_type(dim[target], false, is_array, type[conv]);
414b9abf16eSmaya}
415b9abf16eSmaya
4167ec681f3Smrg
41701e04c3fSmrgstatic void *
4187ec681f3Smrgcreate_fs(struct st_context *st, bool download,
4197ec681f3Smrg          enum pipe_texture_target target,
4207ec681f3Smrg          enum st_pbo_conversion conversion,
4217ec681f3Smrg          bool need_layer)
422b9abf16eSmaya{
4237ec681f3Smrg   struct pipe_screen *screen = st->screen;
424b9abf16eSmaya   const nir_shader_compiler_options *options =
4257ec681f3Smrg      st_get_nir_compiler_options(st, MESA_SHADER_FRAGMENT);
426b9abf16eSmaya   bool pos_is_sysval =
427b9abf16eSmaya      screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
428b9abf16eSmaya
4297ec681f3Smrg   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
4307ec681f3Smrg                                                  download ?
4317ec681f3Smrg                                                  "st/pbo download FS" :
4327ec681f3Smrg                                                  "st/pbo upload FS");
433b9abf16eSmaya
434b9abf16eSmaya   nir_ssa_def *zero = nir_imm_int(&b, 0);
435b9abf16eSmaya
436b9abf16eSmaya   /* param = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
437b9abf16eSmaya   nir_variable *param_var =
438b9abf16eSmaya      nir_variable_create(b.shader, nir_var_uniform, glsl_vec4_type(), "param");
439b9abf16eSmaya   b.shader->num_uniforms += 4;
440b9abf16eSmaya   nir_ssa_def *param = nir_load_var(&b, param_var);
441b9abf16eSmaya
442b9abf16eSmaya   nir_variable *fragcoord =
443b9abf16eSmaya      nir_variable_create(b.shader, pos_is_sysval ? nir_var_system_value :
444b9abf16eSmaya                          nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord");
445b9abf16eSmaya   fragcoord->data.location = pos_is_sysval ? SYSTEM_VALUE_FRAG_COORD
446b9abf16eSmaya                                            : VARYING_SLOT_POS;
447b9abf16eSmaya   nir_ssa_def *coord = nir_load_var(&b, fragcoord);
448b9abf16eSmaya
449b9abf16eSmaya   nir_ssa_def *layer = NULL;
450b9abf16eSmaya   if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY ||
451b9abf16eSmaya                                       target == PIPE_TEXTURE_2D_ARRAY ||
452b9abf16eSmaya                                       target == PIPE_TEXTURE_3D ||
453b9abf16eSmaya                                       target == PIPE_TEXTURE_CUBE ||
454b9abf16eSmaya                                       target == PIPE_TEXTURE_CUBE_ARRAY)) {
4557ec681f3Smrg      if (need_layer) {
4567ec681f3Smrg         nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in,
4577ec681f3Smrg                                                glsl_int_type(), "gl_Layer");
4587ec681f3Smrg         var->data.location = VARYING_SLOT_LAYER;
4597ec681f3Smrg         var->data.interpolation = INTERP_MODE_FLAT;
4607ec681f3Smrg         layer = nir_load_var(&b, var);
4617ec681f3Smrg      }
4627ec681f3Smrg      else {
4637ec681f3Smrg         layer = zero;
4647ec681f3Smrg      }
465b9abf16eSmaya   }
466b9abf16eSmaya
467b9abf16eSmaya   /* offset_pos = param.xy + f2i(coord.xy) */
468b9abf16eSmaya   nir_ssa_def *offset_pos =
469b9abf16eSmaya      nir_iadd(&b, nir_channels(&b, param, TGSI_WRITEMASK_XY),
470b9abf16eSmaya               nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY)));
471b9abf16eSmaya
472b9abf16eSmaya   /* addr = offset_pos.x + offset_pos.y * stride */
473b9abf16eSmaya   nir_ssa_def *pbo_addr =
474b9abf16eSmaya      nir_iadd(&b, nir_channel(&b, offset_pos, 0),
475b9abf16eSmaya               nir_imul(&b, nir_channel(&b, offset_pos, 1),
476b9abf16eSmaya                        nir_channel(&b, param, 2)));
477b9abf16eSmaya   if (layer) {
478b9abf16eSmaya      /* pbo_addr += image_height * layer */
479b9abf16eSmaya      pbo_addr = nir_iadd(&b, pbo_addr,
480b9abf16eSmaya                          nir_imul(&b, layer, nir_channel(&b, param, 3)));
481b9abf16eSmaya   }
482b9abf16eSmaya
483b9abf16eSmaya   nir_ssa_def *texcoord;
484b9abf16eSmaya   if (download) {
485b9abf16eSmaya      texcoord = nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY));
486b9abf16eSmaya
4877ec681f3Smrg      if (target == PIPE_TEXTURE_1D) {
4887ec681f3Smrg         unsigned sw = 0;
4897ec681f3Smrg         texcoord = nir_swizzle(&b, texcoord, &sw, 1);
4907ec681f3Smrg      }
4917ec681f3Smrg
492b9abf16eSmaya      if (layer) {
493b9abf16eSmaya         nir_ssa_def *src_layer = layer;
494b9abf16eSmaya
495b9abf16eSmaya         if (target == PIPE_TEXTURE_3D) {
496b9abf16eSmaya            nir_variable *layer_offset_var =
497b9abf16eSmaya               nir_variable_create(b.shader, nir_var_uniform,
498b9abf16eSmaya                                   glsl_int_type(), "layer_offset");
499b9abf16eSmaya            b.shader->num_uniforms += 1;
500b9abf16eSmaya            layer_offset_var->data.driver_location = 4;
501b9abf16eSmaya            nir_ssa_def *layer_offset = nir_load_var(&b, layer_offset_var);
502b9abf16eSmaya
503b9abf16eSmaya            src_layer = nir_iadd(&b, layer, layer_offset);
504b9abf16eSmaya         }
505b9abf16eSmaya
5067ec681f3Smrg         if (target == PIPE_TEXTURE_1D_ARRAY) {
5077ec681f3Smrg            texcoord = nir_vec2(&b, nir_channel(&b, texcoord, 0),
5087ec681f3Smrg                                    src_layer);
5097ec681f3Smrg         } else {
5107ec681f3Smrg            texcoord = nir_vec3(&b, nir_channel(&b, texcoord, 0),
5117ec681f3Smrg                                    nir_channel(&b, texcoord, 1),
5127ec681f3Smrg                                    src_layer);
5137ec681f3Smrg         }
514b9abf16eSmaya      }
515b9abf16eSmaya   } else {
516b9abf16eSmaya      texcoord = pbo_addr;
517b9abf16eSmaya   }
518b9abf16eSmaya
519b9abf16eSmaya   nir_variable *tex_var =
520b9abf16eSmaya      nir_variable_create(b.shader, nir_var_uniform,
5217ec681f3Smrg                          sampler_type_for_target(target, conversion),
5227ec681f3Smrg                          "tex");
523b9abf16eSmaya   tex_var->data.explicit_binding = true;
524b9abf16eSmaya   tex_var->data.binding = 0;
525b9abf16eSmaya
526b9abf16eSmaya   nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
527b9abf16eSmaya
528b9abf16eSmaya   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
529b9abf16eSmaya   tex->op = nir_texop_txf;
530b9abf16eSmaya   tex->sampler_dim = glsl_get_sampler_dim(tex_var->type);
531b9abf16eSmaya   tex->coord_components =
532b9abf16eSmaya      glsl_get_sampler_coordinate_components(tex_var->type);
5337ec681f3Smrg   tex->is_array = target >= PIPE_TEXTURE_1D_ARRAY;
5347ec681f3Smrg
5357ec681f3Smrg   tex->dest_type = nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(tex_var->type));
536b9abf16eSmaya   tex->src[0].src_type = nir_tex_src_texture_deref;
537b9abf16eSmaya   tex->src[0].src = nir_src_for_ssa(&tex_deref->dest.ssa);
538b9abf16eSmaya   tex->src[1].src_type = nir_tex_src_sampler_deref;
539b9abf16eSmaya   tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa);
540b9abf16eSmaya   tex->src[2].src_type = nir_tex_src_coord;
541b9abf16eSmaya   tex->src[2].src = nir_src_for_ssa(texcoord);
542b9abf16eSmaya   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
543b9abf16eSmaya   nir_builder_instr_insert(&b, &tex->instr);
544b9abf16eSmaya   nir_ssa_def *result = &tex->dest.ssa;
545b9abf16eSmaya
546b9abf16eSmaya   if (conversion == ST_PBO_CONVERT_SINT_TO_UINT)
547b9abf16eSmaya      result = nir_imax(&b, result, zero);
548b9abf16eSmaya   else if (conversion == ST_PBO_CONVERT_UINT_TO_SINT)
549b9abf16eSmaya      result = nir_umin(&b, result, nir_imm_int(&b, (1u << 31) - 1));
550b9abf16eSmaya
551b9abf16eSmaya   if (download) {
5527ec681f3Smrg      static const enum glsl_base_type type[] = {
5537ec681f3Smrg         [ST_PBO_CONVERT_FLOAT] = GLSL_TYPE_FLOAT,
5547ec681f3Smrg         [ST_PBO_CONVERT_UINT] = GLSL_TYPE_UINT,
5557ec681f3Smrg         [ST_PBO_CONVERT_UINT_TO_SINT] = GLSL_TYPE_INT,
5567ec681f3Smrg         [ST_PBO_CONVERT_SINT] = GLSL_TYPE_INT,
5577ec681f3Smrg         [ST_PBO_CONVERT_SINT_TO_UINT] = GLSL_TYPE_UINT,
5587ec681f3Smrg      };
559b9abf16eSmaya      nir_variable *img_var =
560b9abf16eSmaya         nir_variable_create(b.shader, nir_var_uniform,
561b9abf16eSmaya                             glsl_image_type(GLSL_SAMPLER_DIM_BUF, false,
5627ec681f3Smrg                                             type[conversion]), "img");
5637ec681f3Smrg      img_var->data.access = ACCESS_NON_READABLE;
564b9abf16eSmaya      img_var->data.explicit_binding = true;
565b9abf16eSmaya      img_var->data.binding = 0;
566b9abf16eSmaya      nir_deref_instr *img_deref = nir_build_deref_var(&b, img_var);
5677ec681f3Smrg
5687ec681f3Smrg      nir_image_deref_store(&b, &img_deref->dest.ssa,
5697ec681f3Smrg                            nir_vec4(&b, pbo_addr, zero, zero, zero),
5707ec681f3Smrg                            zero,
5717ec681f3Smrg                            result,
5727ec681f3Smrg                            nir_imm_int(&b, 0),
5737ec681f3Smrg                            .image_dim = GLSL_SAMPLER_DIM_BUF);
574b9abf16eSmaya   } else {
575b9abf16eSmaya      nir_variable *color =
576b9abf16eSmaya         nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
577b9abf16eSmaya                             "gl_FragColor");
578b9abf16eSmaya      color->data.location = FRAG_RESULT_COLOR;
579b9abf16eSmaya
580b9abf16eSmaya      nir_store_var(&b, color, result, TGSI_WRITEMASK_XYZW);
581b9abf16eSmaya   }
582b9abf16eSmaya
5837ec681f3Smrg   return st_nir_finish_builtin_shader(st, b.shader);
584b9abf16eSmaya}
585b9abf16eSmaya
58601e04c3fSmrgstatic enum st_pbo_conversion
58701e04c3fSmrgget_pbo_conversion(enum pipe_format src_format, enum pipe_format dst_format)
58801e04c3fSmrg{
58901e04c3fSmrg   if (util_format_is_pure_uint(src_format)) {
5907ec681f3Smrg      if (util_format_is_pure_uint(dst_format))
5917ec681f3Smrg         return ST_PBO_CONVERT_UINT;
59201e04c3fSmrg      if (util_format_is_pure_sint(dst_format))
59301e04c3fSmrg         return ST_PBO_CONVERT_UINT_TO_SINT;
59401e04c3fSmrg   } else if (util_format_is_pure_sint(src_format)) {
5957ec681f3Smrg      if (util_format_is_pure_sint(dst_format))
5967ec681f3Smrg         return ST_PBO_CONVERT_SINT;
59701e04c3fSmrg      if (util_format_is_pure_uint(dst_format))
59801e04c3fSmrg         return ST_PBO_CONVERT_SINT_TO_UINT;
59901e04c3fSmrg   }
60001e04c3fSmrg
6017ec681f3Smrg   return ST_PBO_CONVERT_FLOAT;
60201e04c3fSmrg}
60301e04c3fSmrg
60401e04c3fSmrgvoid *
60501e04c3fSmrgst_pbo_get_upload_fs(struct st_context *st,
60601e04c3fSmrg                     enum pipe_format src_format,
6077ec681f3Smrg                     enum pipe_format dst_format,
6087ec681f3Smrg                     bool need_layer)
60901e04c3fSmrg{
61001e04c3fSmrg   STATIC_ASSERT(ARRAY_SIZE(st->pbo.upload_fs) == ST_NUM_PBO_CONVERSIONS);
61101e04c3fSmrg
61201e04c3fSmrg   enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
61301e04c3fSmrg
6147ec681f3Smrg   if (!st->pbo.upload_fs[conversion][need_layer])
6157ec681f3Smrg      st->pbo.upload_fs[conversion][need_layer] = create_fs(st, false, 0, conversion, need_layer);
61601e04c3fSmrg
6177ec681f3Smrg   return st->pbo.upload_fs[conversion][need_layer];
61801e04c3fSmrg}
61901e04c3fSmrg
62001e04c3fSmrgvoid *
62101e04c3fSmrgst_pbo_get_download_fs(struct st_context *st, enum pipe_texture_target target,
62201e04c3fSmrg                       enum pipe_format src_format,
6237ec681f3Smrg                       enum pipe_format dst_format,
6247ec681f3Smrg                       bool need_layer)
62501e04c3fSmrg{
62601e04c3fSmrg   STATIC_ASSERT(ARRAY_SIZE(st->pbo.download_fs) == ST_NUM_PBO_CONVERSIONS);
62701e04c3fSmrg   assert(target < PIPE_MAX_TEXTURE_TYPES);
62801e04c3fSmrg
62901e04c3fSmrg   enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
63001e04c3fSmrg
6317ec681f3Smrg   if (!st->pbo.download_fs[conversion][target][need_layer])
6327ec681f3Smrg      st->pbo.download_fs[conversion][target][need_layer] = create_fs(st, true, target, conversion, need_layer);
63301e04c3fSmrg
6347ec681f3Smrg   return st->pbo.download_fs[conversion][target][need_layer];
63501e04c3fSmrg}
63601e04c3fSmrg
63701e04c3fSmrgvoid
63801e04c3fSmrgst_init_pbo_helpers(struct st_context *st)
63901e04c3fSmrg{
6407ec681f3Smrg   struct pipe_screen *screen = st->screen;
64101e04c3fSmrg
64201e04c3fSmrg   st->pbo.upload_enabled =
64301e04c3fSmrg      screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
64401e04c3fSmrg      screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 &&
64501e04c3fSmrg      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
64601e04c3fSmrg   if (!st->pbo.upload_enabled)
64701e04c3fSmrg      return;
64801e04c3fSmrg
64901e04c3fSmrg   st->pbo.download_enabled =
65001e04c3fSmrg      st->pbo.upload_enabled &&
65101e04c3fSmrg      screen->get_param(screen, PIPE_CAP_SAMPLER_VIEW_TARGET) &&
65201e04c3fSmrg      screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) &&
65301e04c3fSmrg      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
65401e04c3fSmrg                                       PIPE_SHADER_CAP_MAX_SHADER_IMAGES) >= 1;
65501e04c3fSmrg
65601e04c3fSmrg   st->pbo.rgba_only =
65701e04c3fSmrg      screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
65801e04c3fSmrg
65901e04c3fSmrg   if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) {
66001e04c3fSmrg      if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
66101e04c3fSmrg         st->pbo.layers = true;
66201e04c3fSmrg      } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
66301e04c3fSmrg         st->pbo.layers = true;
66401e04c3fSmrg         st->pbo.use_gs = true;
66501e04c3fSmrg      }
66601e04c3fSmrg   }
66701e04c3fSmrg
66801e04c3fSmrg   /* Blend state */
66901e04c3fSmrg   memset(&st->pbo.upload_blend, 0, sizeof(struct pipe_blend_state));
67001e04c3fSmrg   st->pbo.upload_blend.rt[0].colormask = PIPE_MASK_RGBA;
67101e04c3fSmrg
67201e04c3fSmrg   /* Rasterizer state */
67301e04c3fSmrg   memset(&st->pbo.raster, 0, sizeof(struct pipe_rasterizer_state));
67401e04c3fSmrg   st->pbo.raster.half_pixel_center = 1;
67501e04c3fSmrg}
67601e04c3fSmrg
67701e04c3fSmrgvoid
67801e04c3fSmrgst_destroy_pbo_helpers(struct st_context *st)
67901e04c3fSmrg{
68001e04c3fSmrg   unsigned i;
68101e04c3fSmrg
68201e04c3fSmrg   for (i = 0; i < ARRAY_SIZE(st->pbo.upload_fs); ++i) {
6837ec681f3Smrg      for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.upload_fs[0]); j++) {
6847ec681f3Smrg         if (st->pbo.upload_fs[i][j]) {
6857ec681f3Smrg            st->pipe->delete_fs_state(st->pipe, st->pbo.upload_fs[i][j]);
6867ec681f3Smrg            st->pbo.upload_fs[i][j] = NULL;
6877ec681f3Smrg         }
68801e04c3fSmrg      }
68901e04c3fSmrg   }
69001e04c3fSmrg
69101e04c3fSmrg   for (i = 0; i < ARRAY_SIZE(st->pbo.download_fs); ++i) {
69201e04c3fSmrg      for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.download_fs[0]); ++j) {
6937ec681f3Smrg         for (unsigned k = 0; k < ARRAY_SIZE(st->pbo.download_fs[0][0]); k++) {
6947ec681f3Smrg            if (st->pbo.download_fs[i][j][k]) {
6957ec681f3Smrg               st->pipe->delete_fs_state(st->pipe, st->pbo.download_fs[i][j][k]);
6967ec681f3Smrg               st->pbo.download_fs[i][j][k] = NULL;
6977ec681f3Smrg            }
69801e04c3fSmrg         }
69901e04c3fSmrg      }
70001e04c3fSmrg   }
70101e04c3fSmrg
70201e04c3fSmrg   if (st->pbo.gs) {
7037ec681f3Smrg      st->pipe->delete_gs_state(st->pipe, st->pbo.gs);
70401e04c3fSmrg      st->pbo.gs = NULL;
70501e04c3fSmrg   }
70601e04c3fSmrg
70701e04c3fSmrg   if (st->pbo.vs) {
7087ec681f3Smrg      st->pipe->delete_vs_state(st->pipe, st->pbo.vs);
70901e04c3fSmrg      st->pbo.vs = NULL;
71001e04c3fSmrg   }
71101e04c3fSmrg}
712