1/*
2 * Copyright 2007 VMware, Inc.
3 * Copyright 2016 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25/**
26 * \file
27 *
28 * Common helper functions for PBO up- and downloads.
29 */
30
31#include "state_tracker/st_context.h"
32#include "state_tracker/st_nir.h"
33#include "state_tracker/st_pbo.h"
34#include "state_tracker/st_cb_bufferobjects.h"
35
36#include "pipe/p_context.h"
37#include "pipe/p_defines.h"
38#include "pipe/p_screen.h"
39#include "cso_cache/cso_context.h"
40#include "tgsi/tgsi_ureg.h"
41#include "util/u_format.h"
42#include "util/u_inlines.h"
43#include "util/u_upload_mgr.h"
44
45#include "compiler/nir/nir_builder.h"
46
47/* Conversion to apply in the fragment shader. */
48enum st_pbo_conversion {
49   ST_PBO_CONVERT_NONE = 0,
50   ST_PBO_CONVERT_UINT_TO_SINT,
51   ST_PBO_CONVERT_SINT_TO_UINT,
52
53   ST_NUM_PBO_CONVERSIONS
54};
55
56/* Final setup of buffer addressing information.
57 *
58 * buf_offset is in pixels.
59 *
60 * Returns false if something (e.g. alignment) prevents PBO upload/download.
61 */
62bool
63st_pbo_addresses_setup(struct st_context *st,
64                       struct pipe_resource *buf, intptr_t buf_offset,
65                       struct st_pbo_addresses *addr)
66{
67   unsigned skip_pixels;
68
69   /* Check alignment against texture buffer requirements. */
70   {
71      unsigned ofs = (buf_offset * addr->bytes_per_pixel) % st->ctx->Const.TextureBufferOffsetAlignment;
72      if (ofs != 0) {
73         if (ofs % addr->bytes_per_pixel != 0)
74            return false;
75
76         skip_pixels = ofs / addr->bytes_per_pixel;
77         buf_offset -= skip_pixels;
78      } else {
79         skip_pixels = 0;
80      }
81   }
82
83   assert(buf_offset >= 0);
84
85   addr->buffer = buf;
86   addr->first_element = buf_offset;
87   addr->last_element = buf_offset + skip_pixels + addr->width - 1
88         + (addr->height - 1 + (addr->depth - 1) * addr->image_height) * addr->pixels_per_row;
89
90   if (addr->last_element - addr->first_element > st->ctx->Const.MaxTextureBufferSize - 1)
91      return false;
92
93   /* This should be ensured by Mesa before calling our callbacks */
94   assert((addr->last_element + 1) * addr->bytes_per_pixel <= buf->width0);
95
96   addr->constants.xoffset = -addr->xoffset + skip_pixels;
97   addr->constants.yoffset = -addr->yoffset;
98   addr->constants.stride = addr->pixels_per_row;
99   addr->constants.image_size = addr->pixels_per_row * addr->image_height;
100   addr->constants.layer_offset = 0;
101
102   return true;
103}
104
105/* Validate and fill buffer addressing information based on GL pixelstore
106 * attributes.
107 *
108 * Returns false if some aspect of the addressing (e.g. alignment) prevents
109 * PBO upload/download.
110 */
111bool
112st_pbo_addresses_pixelstore(struct st_context *st,
113                            GLenum gl_target, bool skip_images,
114                            const struct gl_pixelstore_attrib *store,
115                            const void *pixels,
116                            struct st_pbo_addresses *addr)
117{
118   struct pipe_resource *buf = st_buffer_object(store->BufferObj)->buffer;
119   intptr_t buf_offset = (intptr_t) pixels;
120
121   if (buf_offset % addr->bytes_per_pixel)
122      return false;
123
124   /* Convert to texels */
125   buf_offset = buf_offset / addr->bytes_per_pixel;
126
127   /* Determine image height */
128   if (gl_target == GL_TEXTURE_1D_ARRAY) {
129      addr->image_height = 1;
130   } else {
131      addr->image_height = store->ImageHeight > 0 ? store->ImageHeight : addr->height;
132   }
133
134   /* Compute the stride, taking store->Alignment into account */
135   {
136       unsigned pixels_per_row = store->RowLength > 0 ?
137                           store->RowLength : addr->width;
138       unsigned bytes_per_row = pixels_per_row * addr->bytes_per_pixel;
139       unsigned remainder = bytes_per_row % store->Alignment;
140       unsigned offset_rows;
141
142       if (remainder > 0)
143          bytes_per_row += store->Alignment - remainder;
144
145       if (bytes_per_row % addr->bytes_per_pixel)
146          return false;
147
148       addr->pixels_per_row = bytes_per_row / addr->bytes_per_pixel;
149
150       offset_rows = store->SkipRows;
151       if (skip_images)
152          offset_rows += addr->image_height * store->SkipImages;
153
154       buf_offset += store->SkipPixels + addr->pixels_per_row * offset_rows;
155   }
156
157   if (!st_pbo_addresses_setup(st, buf, buf_offset, addr))
158      return false;
159
160   /* Support GL_PACK_INVERT_MESA */
161   if (store->Invert) {
162      addr->constants.xoffset += (addr->height - 1) * addr->constants.stride;
163      addr->constants.stride = -addr->constants.stride;
164   }
165
166   return true;
167}
168
169/* For download from a framebuffer, we may have to invert the Y axis. The
170 * setup is as follows:
171 * - set viewport to inverted, so that the position sysval is correct for
172 *   texel fetches
173 * - this function adjusts the fragment shader's constant buffer to compute
174 *   the correct destination addresses.
175 */
176void
177st_pbo_addresses_invert_y(struct st_pbo_addresses *addr,
178                          unsigned viewport_height)
179{
180   addr->constants.xoffset +=
181      (viewport_height - 1 + 2 * addr->constants.yoffset) * addr->constants.stride;
182   addr->constants.stride = -addr->constants.stride;
183}
184
185/* Setup all vertex pipeline state, rasterizer state, and fragment shader
186 * constants, and issue the draw call for PBO upload/download.
187 *
188 * The caller is responsible for saving and restoring state, as well as for
189 * setting other fragment shader state (fragment shader, samplers), and
190 * framebuffer/viewport/DSA/blend state.
191 */
192bool
193st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr,
194            unsigned surface_width, unsigned surface_height)
195{
196   struct cso_context *cso = st->cso_context;
197
198   /* Setup vertex and geometry shaders */
199   if (!st->pbo.vs) {
200      st->pbo.vs = st_pbo_create_vs(st);
201      if (!st->pbo.vs)
202         return false;
203   }
204
205   if (addr->depth != 1 && st->pbo.use_gs && !st->pbo.gs) {
206      st->pbo.gs = st_pbo_create_gs(st);
207      if (!st->pbo.gs)
208         return false;
209   }
210
211   cso_set_vertex_shader_handle(cso, st->pbo.vs);
212
213   cso_set_geometry_shader_handle(cso, addr->depth != 1 ? st->pbo.gs : NULL);
214
215   cso_set_tessctrl_shader_handle(cso, NULL);
216
217   cso_set_tesseval_shader_handle(cso, NULL);
218
219   /* Upload vertices */
220   {
221      struct pipe_vertex_buffer vbo = {0};
222      struct pipe_vertex_element velem;
223
224      float x0 = (float) addr->xoffset / surface_width * 2.0f - 1.0f;
225      float y0 = (float) addr->yoffset / surface_height * 2.0f - 1.0f;
226      float x1 = (float) (addr->xoffset + addr->width) / surface_width * 2.0f - 1.0f;
227      float y1 = (float) (addr->yoffset + addr->height) / surface_height * 2.0f - 1.0f;
228
229      float *verts = NULL;
230
231      vbo.stride = 2 * sizeof(float);
232
233      u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4,
234                     &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts);
235      if (!verts)
236         return false;
237
238      verts[0] = x0;
239      verts[1] = y0;
240      verts[2] = x0;
241      verts[3] = y1;
242      verts[4] = x1;
243      verts[5] = y0;
244      verts[6] = x1;
245      verts[7] = y1;
246
247      u_upload_unmap(st->pipe->stream_uploader);
248
249      velem.src_offset = 0;
250      velem.instance_divisor = 0;
251      velem.vertex_buffer_index = 0;
252      velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
253
254      cso_set_vertex_elements(cso, 1, &velem);
255
256      cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo);
257
258      pipe_resource_reference(&vbo.buffer.resource, NULL);
259   }
260
261   /* Upload constants */
262   {
263      struct pipe_constant_buffer cb;
264
265      cb.buffer = NULL;
266      cb.user_buffer = &addr->constants;
267      cb.buffer_offset = 0;
268      cb.buffer_size = sizeof(addr->constants);
269
270      cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb);
271
272      pipe_resource_reference(&cb.buffer, NULL);
273   }
274
275   /* Rasterizer state */
276   cso_set_rasterizer(cso, &st->pbo.raster);
277
278   /* Disable stream output */
279   cso_set_stream_outputs(cso, 0, NULL, 0);
280
281   if (addr->depth == 1) {
282      cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
283   } else {
284      cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP,
285                                0, 4, 0, addr->depth);
286   }
287
288   return true;
289}
290
291void *
292st_pbo_create_vs(struct st_context *st)
293{
294   struct pipe_screen *pscreen = st->pipe->screen;
295   bool use_nir = PIPE_SHADER_IR_NIR ==
296      pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
297                                PIPE_SHADER_CAP_PREFERRED_IR);
298
299   if (use_nir) {
300      unsigned inputs[] =  {  VERT_ATTRIB_POS, SYSTEM_VALUE_INSTANCE_ID, };
301      unsigned outputs[] = { VARYING_SLOT_POS,       VARYING_SLOT_LAYER  };
302
303      return st_nir_make_passthrough_shader(st, "st/pbo VS",
304                                            MESA_SHADER_VERTEX,
305                                            st->pbo.layers ? 2 : 1,
306                                            inputs, outputs, NULL, (1 << 1));
307   }
308
309   struct ureg_program *ureg;
310   struct ureg_src in_pos;
311   struct ureg_src in_instanceid;
312   struct ureg_dst out_pos;
313   struct ureg_dst out_layer;
314
315   ureg = ureg_create(PIPE_SHADER_VERTEX);
316   if (!ureg)
317      return NULL;
318
319   in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION);
320
321   out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
322
323   if (st->pbo.layers) {
324      in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);
325
326      if (!st->pbo.use_gs)
327         out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
328   }
329
330   /* out_pos = in_pos */
331   ureg_MOV(ureg, out_pos, in_pos);
332
333   if (st->pbo.layers) {
334      if (st->pbo.use_gs) {
335         /* out_pos.z = i2f(gl_InstanceID) */
336         ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z),
337                        ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
338      } else {
339         /* out_layer = gl_InstanceID */
340         ureg_MOV(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
341                        ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
342      }
343   }
344
345   ureg_END(ureg);
346
347   return ureg_create_shader_and_destroy(ureg, st->pipe);
348}
349
350void *
351st_pbo_create_gs(struct st_context *st)
352{
353   static const int zero = 0;
354   struct ureg_program *ureg;
355   struct ureg_dst out_pos;
356   struct ureg_dst out_layer;
357   struct ureg_src in_pos;
358   struct ureg_src imm;
359   unsigned i;
360
361   ureg = ureg_create(PIPE_SHADER_GEOMETRY);
362   if (!ureg)
363      return NULL;
364
365   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES);
366   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP);
367   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3);
368
369   out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
370   out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
371
372   in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1);
373
374   imm = ureg_DECL_immediate_int(ureg, &zero, 1);
375
376   for (i = 0; i < 3; ++i) {
377      struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i);
378
379      /* out_pos = in_pos[i] */
380      ureg_MOV(ureg, out_pos, in_pos_vertex);
381
382      /* out_layer.x = f2i(in_pos[i].z) */
383      ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
384                     ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z));
385
386      ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X));
387   }
388
389   ureg_END(ureg);
390
391   return ureg_create_shader_and_destroy(ureg, st->pipe);
392}
393
394static void
395build_conversion(struct ureg_program *ureg, const struct ureg_dst *temp,
396                 enum st_pbo_conversion conversion)
397{
398   switch (conversion) {
399   case ST_PBO_CONVERT_SINT_TO_UINT:
400      ureg_IMAX(ureg, *temp, ureg_src(*temp), ureg_imm1i(ureg, 0));
401      break;
402   case ST_PBO_CONVERT_UINT_TO_SINT:
403      ureg_UMIN(ureg, *temp, ureg_src(*temp), ureg_imm1u(ureg, (1u << 31) - 1));
404      break;
405   default:
406      /* no-op */
407      break;
408   }
409}
410
411static const struct glsl_type *
412sampler_type_for_target(enum pipe_texture_target target)
413{
414   bool is_array = target >= PIPE_TEXTURE_1D_ARRAY;
415   static const enum glsl_sampler_dim dim[] = {
416      [PIPE_BUFFER]             = GLSL_SAMPLER_DIM_BUF,
417      [PIPE_TEXTURE_1D]         = GLSL_SAMPLER_DIM_1D,
418      [PIPE_TEXTURE_2D]         = GLSL_SAMPLER_DIM_2D,
419      [PIPE_TEXTURE_3D]         = GLSL_SAMPLER_DIM_3D,
420      [PIPE_TEXTURE_CUBE]       = GLSL_SAMPLER_DIM_CUBE,
421      [PIPE_TEXTURE_RECT]       = GLSL_SAMPLER_DIM_RECT,
422      [PIPE_TEXTURE_1D_ARRAY]   = GLSL_SAMPLER_DIM_1D,
423      [PIPE_TEXTURE_2D_ARRAY]   = GLSL_SAMPLER_DIM_2D,
424      [PIPE_TEXTURE_CUBE_ARRAY] = GLSL_SAMPLER_DIM_CUBE,
425   };
426
427   return glsl_sampler_type(dim[target], false, is_array, GLSL_TYPE_FLOAT);
428}
429
430static void *
431create_fs_nir(struct st_context *st,
432              bool download,
433              enum pipe_texture_target target,
434              enum st_pbo_conversion conversion)
435{
436   struct pipe_screen *screen = st->pipe->screen;
437   struct nir_builder b;
438   const nir_shader_compiler_options *options =
439      st->ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions;
440   bool pos_is_sysval =
441      screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
442
443   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
444
445   nir_ssa_def *zero = nir_imm_int(&b, 0);
446
447   /* param = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
448   nir_variable *param_var =
449      nir_variable_create(b.shader, nir_var_uniform, glsl_vec4_type(), "param");
450   b.shader->num_uniforms += 4;
451   nir_ssa_def *param = nir_load_var(&b, param_var);
452
453   nir_variable *fragcoord =
454      nir_variable_create(b.shader, pos_is_sysval ? nir_var_system_value :
455                          nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord");
456   fragcoord->data.location = pos_is_sysval ? SYSTEM_VALUE_FRAG_COORD
457                                            : VARYING_SLOT_POS;
458   nir_ssa_def *coord = nir_load_var(&b, fragcoord);
459
460   nir_ssa_def *layer = NULL;
461   if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY ||
462                                       target == PIPE_TEXTURE_2D_ARRAY ||
463                                       target == PIPE_TEXTURE_3D ||
464                                       target == PIPE_TEXTURE_CUBE ||
465                                       target == PIPE_TEXTURE_CUBE_ARRAY)) {
466      nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in,
467                                              glsl_int_type(), "gl_Layer");
468      var->data.location = VARYING_SLOT_LAYER;
469      var->data.interpolation = INTERP_MODE_FLAT;
470      layer = nir_load_var(&b, var);
471   }
472
473   /* offset_pos = param.xy + f2i(coord.xy) */
474   nir_ssa_def *offset_pos =
475      nir_iadd(&b, nir_channels(&b, param, TGSI_WRITEMASK_XY),
476               nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY)));
477
478   /* addr = offset_pos.x + offset_pos.y * stride */
479   nir_ssa_def *pbo_addr =
480      nir_iadd(&b, nir_channel(&b, offset_pos, 0),
481               nir_imul(&b, nir_channel(&b, offset_pos, 1),
482                        nir_channel(&b, param, 2)));
483   if (layer) {
484      /* pbo_addr += image_height * layer */
485      pbo_addr = nir_iadd(&b, pbo_addr,
486                          nir_imul(&b, layer, nir_channel(&b, param, 3)));
487   }
488
489   nir_ssa_def *texcoord;
490   if (download) {
491      texcoord = nir_f2i32(&b, nir_channels(&b, coord, TGSI_WRITEMASK_XY));
492
493      if (layer) {
494         nir_ssa_def *src_layer = layer;
495
496         if (target == PIPE_TEXTURE_3D) {
497            nir_variable *layer_offset_var =
498               nir_variable_create(b.shader, nir_var_uniform,
499                                   glsl_int_type(), "layer_offset");
500            b.shader->num_uniforms += 1;
501            layer_offset_var->data.driver_location = 4;
502            nir_ssa_def *layer_offset = nir_load_var(&b, layer_offset_var);
503
504            src_layer = nir_iadd(&b, layer, layer_offset);
505         }
506
507         texcoord = nir_vec3(&b, nir_channel(&b, texcoord, 0),
508                                 nir_channel(&b, texcoord, 1),
509                                 src_layer);
510      }
511   } else {
512      texcoord = pbo_addr;
513   }
514
515   nir_variable *tex_var =
516      nir_variable_create(b.shader, nir_var_uniform,
517                          sampler_type_for_target(target), "tex");
518   tex_var->data.explicit_binding = true;
519   tex_var->data.binding = 0;
520
521   nir_deref_instr *tex_deref = nir_build_deref_var(&b, tex_var);
522
523   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
524   tex->op = nir_texop_txf;
525   tex->sampler_dim = glsl_get_sampler_dim(tex_var->type);
526   tex->coord_components =
527      glsl_get_sampler_coordinate_components(tex_var->type);
528   tex->dest_type = nir_type_float;
529   tex->src[0].src_type = nir_tex_src_texture_deref;
530   tex->src[0].src = nir_src_for_ssa(&tex_deref->dest.ssa);
531   tex->src[1].src_type = nir_tex_src_sampler_deref;
532   tex->src[1].src = nir_src_for_ssa(&tex_deref->dest.ssa);
533   tex->src[2].src_type = nir_tex_src_coord;
534   tex->src[2].src = nir_src_for_ssa(texcoord);
535   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
536   nir_builder_instr_insert(&b, &tex->instr);
537   nir_ssa_def *result = &tex->dest.ssa;
538
539   if (conversion == ST_PBO_CONVERT_SINT_TO_UINT)
540      result = nir_imax(&b, result, zero);
541   else if (conversion == ST_PBO_CONVERT_UINT_TO_SINT)
542      result = nir_umin(&b, result, nir_imm_int(&b, (1u << 31) - 1));
543
544   if (download) {
545      nir_variable *img_var =
546         nir_variable_create(b.shader, nir_var_uniform,
547                             glsl_image_type(GLSL_SAMPLER_DIM_BUF, false,
548                                             GLSL_TYPE_FLOAT), "img");
549      img_var->data.image.access = ACCESS_NON_READABLE;
550      img_var->data.explicit_binding = true;
551      img_var->data.binding = 0;
552      nir_deref_instr *img_deref = nir_build_deref_var(&b, img_var);
553      nir_intrinsic_instr *intrin =
554         nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
555      intrin->src[0] = nir_src_for_ssa(&img_deref->dest.ssa);
556      intrin->src[1] =
557         nir_src_for_ssa(nir_vec4(&b, pbo_addr, zero, zero, zero));
558      intrin->src[2] = nir_src_for_ssa(zero);
559      intrin->src[3] = nir_src_for_ssa(result);
560      intrin->num_components = 4;
561      nir_builder_instr_insert(&b, &intrin->instr);
562   } else {
563      nir_variable *color =
564         nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
565                             "gl_FragColor");
566      color->data.location = FRAG_RESULT_COLOR;
567
568      nir_store_var(&b, color, result, TGSI_WRITEMASK_XYZW);
569   }
570
571   return st_nir_finish_builtin_shader(st, b.shader, download ?
572                                       "st/pbo download FS" :
573                                       "st/pbo upload FS");
574}
575
576static void *
577create_fs_tgsi(struct st_context *st, bool download,
578               enum pipe_texture_target target,
579               enum st_pbo_conversion conversion)
580{
581   struct pipe_context *pipe = st->pipe;
582   struct pipe_screen *screen = pipe->screen;
583   struct ureg_program *ureg;
584   bool have_layer;
585   struct ureg_dst out;
586   struct ureg_src sampler;
587   struct ureg_src pos;
588   struct ureg_src layer;
589   struct ureg_src const0;
590   struct ureg_src const1;
591   struct ureg_dst temp0;
592
593   have_layer =
594      st->pbo.layers &&
595      (!download || target == PIPE_TEXTURE_1D_ARRAY
596                 || target == PIPE_TEXTURE_2D_ARRAY
597                 || target == PIPE_TEXTURE_3D
598                 || target == PIPE_TEXTURE_CUBE
599                 || target == PIPE_TEXTURE_CUBE_ARRAY);
600
601   ureg = ureg_create(PIPE_SHADER_FRAGMENT);
602   if (!ureg)
603      return NULL;
604
605   if (!download) {
606      out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
607   } else {
608      struct ureg_src image;
609
610      /* writeonly images do not require an explicitly given format. */
611      image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE,
612                                    true, false);
613      out = ureg_dst(image);
614   }
615
616   sampler = ureg_DECL_sampler(ureg, 0);
617   if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
618      pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
619   } else {
620      pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
621                               TGSI_INTERPOLATE_LINEAR);
622   }
623   if (have_layer) {
624      layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
625                                       TGSI_INTERPOLATE_CONSTANT);
626   }
627   const0  = ureg_DECL_constant(ureg, 0);
628   const1  = ureg_DECL_constant(ureg, 1);
629   temp0   = ureg_DECL_temporary(ureg);
630
631   /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
632
633   /* temp0.xy = f2i(temp0.xy) */
634   ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
635                  ureg_swizzle(pos,
636                               TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
637                               TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
638
639   /* temp0.xy = temp0.xy + const0.xy */
640   ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
641                   ureg_swizzle(ureg_src(temp0),
642                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
643                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
644                   ureg_swizzle(const0,
645                                TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
646                                TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
647
648   /* temp0.x = const0.z * temp0.y + temp0.x */
649   ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
650                   ureg_scalar(const0, TGSI_SWIZZLE_Z),
651                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
652                   ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
653
654   if (have_layer) {
655      /* temp0.x = const0.w * layer + temp0.x */
656      ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
657                      ureg_scalar(const0, TGSI_SWIZZLE_W),
658                      ureg_scalar(layer, TGSI_SWIZZLE_X),
659                      ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
660   }
661
662   /* temp0.w = 0 */
663   ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0));
664
665   if (download) {
666      struct ureg_dst temp1;
667      struct ureg_src op[2];
668
669      temp1 = ureg_DECL_temporary(ureg);
670
671      /* temp1.xy = pos.xy */
672      ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos);
673
674      /* temp1.zw = 0 */
675      ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0));
676
677      if (have_layer) {
678         struct ureg_dst temp1_layer =
679            ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y
680                                                                  : TGSI_WRITEMASK_Z);
681
682         /* temp1.y/z = layer */
683         ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X));
684
685         if (target == PIPE_TEXTURE_3D) {
686            /* temp1.z += layer_offset */
687            ureg_UADD(ureg, temp1_layer,
688                            ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z),
689                            ureg_scalar(const1, TGSI_SWIZZLE_X));
690         }
691      }
692
693      /* temp1 = txf(sampler, temp1) */
694      ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1),
695                     ureg_src(temp1), sampler);
696
697      build_conversion(ureg, &temp1, conversion);
698
699      /* store(out, temp0, temp1) */
700      op[0] = ureg_src(temp0);
701      op[1] = ureg_src(temp1);
702      ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0,
703                             TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE);
704
705      ureg_release_temporary(ureg, temp1);
706   } else {
707      /* out = txf(sampler, temp0.x) */
708      ureg_TXF(ureg, temp0, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler);
709
710      build_conversion(ureg, &temp0, conversion);
711
712      ureg_MOV(ureg, out, ureg_src(temp0));
713   }
714
715   ureg_release_temporary(ureg, temp0);
716
717   ureg_END(ureg);
718
719   return ureg_create_shader_and_destroy(ureg, pipe);
720}
721
722static void *
723create_fs(struct st_context *st, bool download,
724          enum pipe_texture_target target,
725          enum st_pbo_conversion conversion)
726{
727   struct pipe_screen *pscreen = st->pipe->screen;
728   bool use_nir = PIPE_SHADER_IR_NIR ==
729      pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
730                                PIPE_SHADER_CAP_PREFERRED_IR);
731
732   if (use_nir)
733      return create_fs_nir(st, download, target, conversion);
734
735   return create_fs_tgsi(st, download, target, conversion);
736}
737
738static enum st_pbo_conversion
739get_pbo_conversion(enum pipe_format src_format, enum pipe_format dst_format)
740{
741   if (util_format_is_pure_uint(src_format)) {
742      if (util_format_is_pure_sint(dst_format))
743         return ST_PBO_CONVERT_UINT_TO_SINT;
744   } else if (util_format_is_pure_sint(src_format)) {
745      if (util_format_is_pure_uint(dst_format))
746         return ST_PBO_CONVERT_SINT_TO_UINT;
747   }
748
749   return ST_PBO_CONVERT_NONE;
750}
751
752void *
753st_pbo_get_upload_fs(struct st_context *st,
754                     enum pipe_format src_format,
755                     enum pipe_format dst_format)
756{
757   STATIC_ASSERT(ARRAY_SIZE(st->pbo.upload_fs) == ST_NUM_PBO_CONVERSIONS);
758
759   enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
760
761   if (!st->pbo.upload_fs[conversion])
762      st->pbo.upload_fs[conversion] = create_fs(st, false, 0, conversion);
763
764   return st->pbo.upload_fs[conversion];
765}
766
767void *
768st_pbo_get_download_fs(struct st_context *st, enum pipe_texture_target target,
769                       enum pipe_format src_format,
770                       enum pipe_format dst_format)
771{
772   STATIC_ASSERT(ARRAY_SIZE(st->pbo.download_fs) == ST_NUM_PBO_CONVERSIONS);
773   assert(target < PIPE_MAX_TEXTURE_TYPES);
774
775   enum st_pbo_conversion conversion = get_pbo_conversion(src_format, dst_format);
776
777   if (!st->pbo.download_fs[conversion][target])
778      st->pbo.download_fs[conversion][target] = create_fs(st, true, target, conversion);
779
780   return st->pbo.download_fs[conversion][target];
781}
782
783void
784st_init_pbo_helpers(struct st_context *st)
785{
786   struct pipe_context *pipe = st->pipe;
787   struct pipe_screen *screen = pipe->screen;
788
789   st->pbo.upload_enabled =
790      screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
791      screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 &&
792      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
793   if (!st->pbo.upload_enabled)
794      return;
795
796   st->pbo.download_enabled =
797      st->pbo.upload_enabled &&
798      screen->get_param(screen, PIPE_CAP_SAMPLER_VIEW_TARGET) &&
799      screen->get_param(screen, PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT) &&
800      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
801                                       PIPE_SHADER_CAP_MAX_SHADER_IMAGES) >= 1;
802
803   st->pbo.rgba_only =
804      screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
805
806   if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) {
807      if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
808         st->pbo.layers = true;
809      } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
810         st->pbo.layers = true;
811         st->pbo.use_gs = true;
812      }
813   }
814
815   /* Blend state */
816   memset(&st->pbo.upload_blend, 0, sizeof(struct pipe_blend_state));
817   st->pbo.upload_blend.rt[0].colormask = PIPE_MASK_RGBA;
818
819   /* Rasterizer state */
820   memset(&st->pbo.raster, 0, sizeof(struct pipe_rasterizer_state));
821   st->pbo.raster.half_pixel_center = 1;
822}
823
824void
825st_destroy_pbo_helpers(struct st_context *st)
826{
827   unsigned i;
828
829   for (i = 0; i < ARRAY_SIZE(st->pbo.upload_fs); ++i) {
830      if (st->pbo.upload_fs[i]) {
831         cso_delete_fragment_shader(st->cso_context, st->pbo.upload_fs[i]);
832         st->pbo.upload_fs[i] = NULL;
833      }
834   }
835
836   for (i = 0; i < ARRAY_SIZE(st->pbo.download_fs); ++i) {
837      for (unsigned j = 0; j < ARRAY_SIZE(st->pbo.download_fs[0]); ++j) {
838         if (st->pbo.download_fs[i][j]) {
839            cso_delete_fragment_shader(st->cso_context, st->pbo.download_fs[i][j]);
840            st->pbo.download_fs[i][j] = NULL;
841         }
842      }
843   }
844
845   if (st->pbo.gs) {
846      cso_delete_geometry_shader(st->cso_context, st->pbo.gs);
847      st->pbo.gs = NULL;
848   }
849
850   if (st->pbo.vs) {
851      cso_delete_vertex_shader(st->cso_context, st->pbo.vs);
852      st->pbo.vs = NULL;
853   }
854}
855