1b8e80941Smrg/*
2b8e80941Smrg * Copyright 2016 Red Hat.
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
8b8e80941Smrg * license, and/or sell copies of the Software, and to permit persons to whom
9b8e80941Smrg * the Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "sp_context.h"
25b8e80941Smrg#include "sp_image.h"
26b8e80941Smrg#include "sp_texture.h"
27b8e80941Smrg
28b8e80941Smrg#include "util/u_format.h"
29b8e80941Smrg
30b8e80941Smrg/*
31b8e80941Smrg * Get the offset into the base image
32b8e80941Smrg * first element for a buffer or layer/level for texture.
33b8e80941Smrg */
34b8e80941Smrgstatic uint32_t
35b8e80941Smrgget_image_offset(const struct softpipe_resource *spr,
36b8e80941Smrg                 const struct pipe_image_view *iview,
37b8e80941Smrg                 enum pipe_format format, unsigned r_coord)
38b8e80941Smrg{
39b8e80941Smrg   int base_layer = 0;
40b8e80941Smrg
41b8e80941Smrg   if (spr->base.target == PIPE_BUFFER)
42b8e80941Smrg      return iview->u.buf.offset;
43b8e80941Smrg
44b8e80941Smrg   if (spr->base.target == PIPE_TEXTURE_1D_ARRAY ||
45b8e80941Smrg       spr->base.target == PIPE_TEXTURE_2D_ARRAY ||
46b8e80941Smrg       spr->base.target == PIPE_TEXTURE_CUBE_ARRAY ||
47b8e80941Smrg       spr->base.target == PIPE_TEXTURE_CUBE ||
48b8e80941Smrg       spr->base.target == PIPE_TEXTURE_3D)
49b8e80941Smrg      base_layer = r_coord + iview->u.tex.first_layer;
50b8e80941Smrg   return softpipe_get_tex_image_offset(spr, iview->u.tex.level, base_layer);
51b8e80941Smrg}
52b8e80941Smrg
53b8e80941Smrg/*
54b8e80941Smrg * Does this texture instruction have a layer or depth parameter.
55b8e80941Smrg */
56b8e80941Smrgstatic inline bool
57b8e80941Smrghas_layer_or_depth(unsigned tgsi_tex_instr)
58b8e80941Smrg{
59b8e80941Smrg   return (tgsi_tex_instr == TGSI_TEXTURE_3D ||
60b8e80941Smrg           tgsi_tex_instr == TGSI_TEXTURE_CUBE ||
61b8e80941Smrg           tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY ||
62b8e80941Smrg           tgsi_tex_instr == TGSI_TEXTURE_2D_ARRAY ||
63b8e80941Smrg           tgsi_tex_instr == TGSI_TEXTURE_CUBE_ARRAY ||
64b8e80941Smrg           tgsi_tex_instr == TGSI_TEXTURE_2D_ARRAY_MSAA);
65b8e80941Smrg}
66b8e80941Smrg
67b8e80941Smrg/*
68b8e80941Smrg * Is this texture instruction a single non-array coordinate.
69b8e80941Smrg */
70b8e80941Smrgstatic inline bool
71b8e80941Smrghas_1coord(unsigned tgsi_tex_instr)
72b8e80941Smrg{
73b8e80941Smrg   return (tgsi_tex_instr == TGSI_TEXTURE_BUFFER ||
74b8e80941Smrg           tgsi_tex_instr == TGSI_TEXTURE_1D ||
75b8e80941Smrg           tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY);
76b8e80941Smrg}
77b8e80941Smrg
78b8e80941Smrg/*
79b8e80941Smrg * check the bounds vs w/h/d
80b8e80941Smrg */
81b8e80941Smrgstatic inline bool
82b8e80941Smrgbounds_check(int width, int height, int depth,
83b8e80941Smrg             int s, int t, int r)
84b8e80941Smrg{
85b8e80941Smrg   if (s < 0 || s >= width)
86b8e80941Smrg      return false;
87b8e80941Smrg   if (t < 0 || t >= height)
88b8e80941Smrg      return false;
89b8e80941Smrg   if (r < 0 || r >= depth)
90b8e80941Smrg      return false;
91b8e80941Smrg   return true;
92b8e80941Smrg}
93b8e80941Smrg
94b8e80941Smrg/*
95b8e80941Smrg * Checks if the texture target compatible with the image resource
96b8e80941Smrg * pipe target.
97b8e80941Smrg */
98b8e80941Smrgstatic inline bool
99b8e80941Smrghas_compat_target(unsigned pipe_target, unsigned tgsi_target)
100b8e80941Smrg{
101b8e80941Smrg   switch (pipe_target) {
102b8e80941Smrg   case PIPE_TEXTURE_1D:
103b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_1D)
104b8e80941Smrg         return true;
105b8e80941Smrg      break;
106b8e80941Smrg   case PIPE_TEXTURE_2D:
107b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_2D)
108b8e80941Smrg         return true;
109b8e80941Smrg      break;
110b8e80941Smrg   case PIPE_TEXTURE_RECT:
111b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_RECT)
112b8e80941Smrg         return true;
113b8e80941Smrg      break;
114b8e80941Smrg   case PIPE_TEXTURE_3D:
115b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_3D ||
116b8e80941Smrg          tgsi_target == TGSI_TEXTURE_2D)
117b8e80941Smrg         return true;
118b8e80941Smrg      break;
119b8e80941Smrg   case PIPE_TEXTURE_CUBE:
120b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_CUBE ||
121b8e80941Smrg          tgsi_target == TGSI_TEXTURE_2D)
122b8e80941Smrg         return true;
123b8e80941Smrg      break;
124b8e80941Smrg   case PIPE_TEXTURE_1D_ARRAY:
125b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_1D ||
126b8e80941Smrg          tgsi_target == TGSI_TEXTURE_1D_ARRAY)
127b8e80941Smrg         return true;
128b8e80941Smrg      break;
129b8e80941Smrg   case PIPE_TEXTURE_2D_ARRAY:
130b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_2D ||
131b8e80941Smrg          tgsi_target == TGSI_TEXTURE_2D_ARRAY)
132b8e80941Smrg         return true;
133b8e80941Smrg      break;
134b8e80941Smrg   case PIPE_TEXTURE_CUBE_ARRAY:
135b8e80941Smrg      if (tgsi_target == TGSI_TEXTURE_CUBE ||
136b8e80941Smrg          tgsi_target == TGSI_TEXTURE_CUBE_ARRAY ||
137b8e80941Smrg          tgsi_target == TGSI_TEXTURE_2D)
138b8e80941Smrg         return true;
139b8e80941Smrg      break;
140b8e80941Smrg   case PIPE_BUFFER:
141b8e80941Smrg      return (tgsi_target == TGSI_TEXTURE_BUFFER);
142b8e80941Smrg   }
143b8e80941Smrg   return false;
144b8e80941Smrg}
145b8e80941Smrg
146b8e80941Smrgstatic bool
147b8e80941Smrgget_dimensions(const struct pipe_image_view *iview,
148b8e80941Smrg               const struct softpipe_resource *spr,
149b8e80941Smrg               unsigned tgsi_tex_instr,
150b8e80941Smrg               enum pipe_format pformat,
151b8e80941Smrg               unsigned *width,
152b8e80941Smrg               unsigned *height,
153b8e80941Smrg               unsigned *depth)
154b8e80941Smrg{
155b8e80941Smrg   if (tgsi_tex_instr == TGSI_TEXTURE_BUFFER) {
156b8e80941Smrg      *width = iview->u.buf.size / util_format_get_blocksize(pformat);
157b8e80941Smrg      *height = 1;
158b8e80941Smrg      *depth = 1;
159b8e80941Smrg      /*
160b8e80941Smrg       * Bounds check the buffer size from the view
161b8e80941Smrg       * and the buffer size from the underlying buffer.
162b8e80941Smrg       */
163b8e80941Smrg      if (util_format_get_stride(pformat, *width) >
164b8e80941Smrg          util_format_get_stride(spr->base.format, spr->base.width0))
165b8e80941Smrg         return false;
166b8e80941Smrg   } else {
167b8e80941Smrg      unsigned level;
168b8e80941Smrg
169b8e80941Smrg      level = spr->base.target == PIPE_BUFFER ? 0 : iview->u.tex.level;
170b8e80941Smrg      *width = u_minify(spr->base.width0, level);
171b8e80941Smrg      *height = u_minify(spr->base.height0, level);
172b8e80941Smrg
173b8e80941Smrg      if (spr->base.target == PIPE_TEXTURE_3D)
174b8e80941Smrg         *depth = u_minify(spr->base.depth0, level);
175b8e80941Smrg      else
176b8e80941Smrg         *depth = spr->base.array_size;
177b8e80941Smrg
178b8e80941Smrg      /* Make sure the resource and view have compatiable formats */
179b8e80941Smrg      if (util_format_get_blocksize(pformat) >
180b8e80941Smrg          util_format_get_blocksize(spr->base.format))
181b8e80941Smrg         return false;
182b8e80941Smrg   }
183b8e80941Smrg   return true;
184b8e80941Smrg}
185b8e80941Smrg
186b8e80941Smrgstatic void
187b8e80941Smrgfill_coords(const struct tgsi_image_params *params,
188b8e80941Smrg            unsigned index,
189b8e80941Smrg            const int s[TGSI_QUAD_SIZE],
190b8e80941Smrg            const int t[TGSI_QUAD_SIZE],
191b8e80941Smrg            const int r[TGSI_QUAD_SIZE],
192b8e80941Smrg            int *s_coord, int *t_coord, int *r_coord)
193b8e80941Smrg{
194b8e80941Smrg   *s_coord = s[index];
195b8e80941Smrg   *t_coord = has_1coord(params->tgsi_tex_instr) ? 0 : t[index];
196b8e80941Smrg   *r_coord = has_layer_or_depth(params->tgsi_tex_instr) ?
197b8e80941Smrg      (params->tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY ? t[index] : r[index]) : 0;
198b8e80941Smrg}
199b8e80941Smrg/*
200b8e80941Smrg * Implement the image LOAD operation.
201b8e80941Smrg */
202b8e80941Smrgstatic void
203b8e80941Smrgsp_tgsi_load(const struct tgsi_image *image,
204b8e80941Smrg             const struct tgsi_image_params *params,
205b8e80941Smrg             const int s[TGSI_QUAD_SIZE],
206b8e80941Smrg             const int t[TGSI_QUAD_SIZE],
207b8e80941Smrg             const int r[TGSI_QUAD_SIZE],
208b8e80941Smrg             const int sample[TGSI_QUAD_SIZE],
209b8e80941Smrg             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
210b8e80941Smrg{
211b8e80941Smrg   struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
212b8e80941Smrg   struct pipe_image_view *iview;
213b8e80941Smrg   struct softpipe_resource *spr;
214b8e80941Smrg   unsigned width, height, depth;
215b8e80941Smrg   unsigned stride;
216b8e80941Smrg   int c, j;
217b8e80941Smrg   char *data_ptr;
218b8e80941Smrg   unsigned offset = 0;
219b8e80941Smrg
220b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_IMAGES)
221b8e80941Smrg      goto fail_write_all_zero;
222b8e80941Smrg   iview = &sp_img->sp_iview[params->unit];
223b8e80941Smrg   spr = (struct softpipe_resource *)iview->resource;
224b8e80941Smrg   if (!spr)
225b8e80941Smrg      goto fail_write_all_zero;
226b8e80941Smrg
227b8e80941Smrg   if (!has_compat_target(spr->base.target, params->tgsi_tex_instr))
228b8e80941Smrg      goto fail_write_all_zero;
229b8e80941Smrg
230b8e80941Smrg   if (!get_dimensions(iview, spr, params->tgsi_tex_instr,
231b8e80941Smrg                       params->format, &width, &height, &depth))
232b8e80941Smrg      return;
233b8e80941Smrg
234b8e80941Smrg   stride = util_format_get_stride(params->format, width);
235b8e80941Smrg
236b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
237b8e80941Smrg      int s_coord, t_coord, r_coord;
238b8e80941Smrg      bool fill_zero = false;
239b8e80941Smrg
240b8e80941Smrg      if (!(params->execmask & (1 << j)))
241b8e80941Smrg         fill_zero = true;
242b8e80941Smrg
243b8e80941Smrg      fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord);
244b8e80941Smrg      if (!bounds_check(width, height, depth,
245b8e80941Smrg                        s_coord, t_coord, r_coord))
246b8e80941Smrg         fill_zero = true;
247b8e80941Smrg
248b8e80941Smrg      if (fill_zero) {
249b8e80941Smrg         int nc = util_format_get_nr_components(params->format);
250b8e80941Smrg         int ival = util_format_is_pure_integer(params->format);
251b8e80941Smrg         for (c = 0; c < 4; c++) {
252b8e80941Smrg            rgba[c][j] = 0;
253b8e80941Smrg            if (c == 3 && nc < 4) {
254b8e80941Smrg               if (ival)
255b8e80941Smrg                  ((int32_t *)rgba[c])[j] = 1;
256b8e80941Smrg               else
257b8e80941Smrg                  rgba[c][j] = 1.0;
258b8e80941Smrg            }
259b8e80941Smrg         }
260b8e80941Smrg         continue;
261b8e80941Smrg      }
262b8e80941Smrg      offset = get_image_offset(spr, iview, params->format, r_coord);
263b8e80941Smrg      data_ptr = (char *)spr->data + offset;
264b8e80941Smrg
265b8e80941Smrg      if (util_format_is_pure_sint(params->format)) {
266b8e80941Smrg         int32_t sdata[4];
267b8e80941Smrg
268b8e80941Smrg         util_format_read_4i(params->format,
269b8e80941Smrg                             sdata, 0,
270b8e80941Smrg                             data_ptr, stride,
271b8e80941Smrg                             s_coord, t_coord, 1, 1);
272b8e80941Smrg         for (c = 0; c < 4; c++)
273b8e80941Smrg            ((int32_t *)rgba[c])[j] = sdata[c];
274b8e80941Smrg      } else if (util_format_is_pure_uint(params->format)) {
275b8e80941Smrg         uint32_t sdata[4];
276b8e80941Smrg         util_format_read_4ui(params->format,
277b8e80941Smrg                             sdata, 0,
278b8e80941Smrg                             data_ptr, stride,
279b8e80941Smrg                             s_coord, t_coord, 1, 1);
280b8e80941Smrg         for (c = 0; c < 4; c++)
281b8e80941Smrg            ((uint32_t *)rgba[c])[j] = sdata[c];
282b8e80941Smrg      } else {
283b8e80941Smrg         float sdata[4];
284b8e80941Smrg         util_format_read_4f(params->format,
285b8e80941Smrg                             sdata, 0,
286b8e80941Smrg                             data_ptr, stride,
287b8e80941Smrg                             s_coord, t_coord, 1, 1);
288b8e80941Smrg         for (c = 0; c < 4; c++)
289b8e80941Smrg            rgba[c][j] = sdata[c];
290b8e80941Smrg      }
291b8e80941Smrg   }
292b8e80941Smrg   return;
293b8e80941Smrgfail_write_all_zero:
294b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
295b8e80941Smrg      for (c = 0; c < 4; c++)
296b8e80941Smrg         rgba[c][j] = 0;
297b8e80941Smrg   }
298b8e80941Smrg   return;
299b8e80941Smrg}
300b8e80941Smrg
301b8e80941Smrg/*
302b8e80941Smrg * Implement the image STORE operation.
303b8e80941Smrg */
304b8e80941Smrgstatic void
305b8e80941Smrgsp_tgsi_store(const struct tgsi_image *image,
306b8e80941Smrg              const struct tgsi_image_params *params,
307b8e80941Smrg              const int s[TGSI_QUAD_SIZE],
308b8e80941Smrg              const int t[TGSI_QUAD_SIZE],
309b8e80941Smrg              const int r[TGSI_QUAD_SIZE],
310b8e80941Smrg              const int sample[TGSI_QUAD_SIZE],
311b8e80941Smrg              float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
312b8e80941Smrg{
313b8e80941Smrg   struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
314b8e80941Smrg   struct pipe_image_view *iview;
315b8e80941Smrg   struct softpipe_resource *spr;
316b8e80941Smrg   unsigned width, height, depth;
317b8e80941Smrg   unsigned stride;
318b8e80941Smrg   char *data_ptr;
319b8e80941Smrg   int j, c;
320b8e80941Smrg   unsigned offset = 0;
321b8e80941Smrg   unsigned pformat = params->format;
322b8e80941Smrg
323b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_IMAGES)
324b8e80941Smrg      return;
325b8e80941Smrg   iview = &sp_img->sp_iview[params->unit];
326b8e80941Smrg   spr = (struct softpipe_resource *)iview->resource;
327b8e80941Smrg   if (!spr)
328b8e80941Smrg      return;
329b8e80941Smrg   if (!has_compat_target(spr->base.target, params->tgsi_tex_instr))
330b8e80941Smrg      return;
331b8e80941Smrg
332b8e80941Smrg   if (params->format == PIPE_FORMAT_NONE)
333b8e80941Smrg      pformat = spr->base.format;
334b8e80941Smrg
335b8e80941Smrg   if (!get_dimensions(iview, spr, params->tgsi_tex_instr,
336b8e80941Smrg                       pformat, &width, &height, &depth))
337b8e80941Smrg      return;
338b8e80941Smrg
339b8e80941Smrg   stride = util_format_get_stride(pformat, width);
340b8e80941Smrg
341b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
342b8e80941Smrg      int s_coord, t_coord, r_coord;
343b8e80941Smrg
344b8e80941Smrg      if (!(params->execmask & (1 << j)))
345b8e80941Smrg         continue;
346b8e80941Smrg
347b8e80941Smrg      fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord);
348b8e80941Smrg      if (!bounds_check(width, height, depth,
349b8e80941Smrg                        s_coord, t_coord, r_coord))
350b8e80941Smrg         continue;
351b8e80941Smrg
352b8e80941Smrg      offset = get_image_offset(spr, iview, pformat, r_coord);
353b8e80941Smrg      data_ptr = (char *)spr->data + offset;
354b8e80941Smrg
355b8e80941Smrg      if (util_format_is_pure_sint(pformat)) {
356b8e80941Smrg         int32_t sdata[4];
357b8e80941Smrg         for (c = 0; c < 4; c++)
358b8e80941Smrg            sdata[c] = ((int32_t *)rgba[c])[j];
359b8e80941Smrg         util_format_write_4i(pformat, sdata, 0, data_ptr, stride,
360b8e80941Smrg                              s_coord, t_coord, 1, 1);
361b8e80941Smrg      } else if (util_format_is_pure_uint(pformat)) {
362b8e80941Smrg         uint32_t sdata[4];
363b8e80941Smrg         for (c = 0; c < 4; c++)
364b8e80941Smrg            sdata[c] = ((uint32_t *)rgba[c])[j];
365b8e80941Smrg         util_format_write_4ui(pformat, sdata, 0, data_ptr, stride,
366b8e80941Smrg                               s_coord, t_coord, 1, 1);
367b8e80941Smrg      } else {
368b8e80941Smrg         float sdata[4];
369b8e80941Smrg         for (c = 0; c < 4; c++)
370b8e80941Smrg            sdata[c] = rgba[c][j];
371b8e80941Smrg         util_format_write_4f(pformat, sdata, 0, data_ptr, stride,
372b8e80941Smrg                              s_coord, t_coord, 1, 1);
373b8e80941Smrg      }
374b8e80941Smrg   }
375b8e80941Smrg}
376b8e80941Smrg
377b8e80941Smrg/*
378b8e80941Smrg * Implement atomic operations on unsigned integers.
379b8e80941Smrg */
380b8e80941Smrgstatic void
381b8e80941Smrghandle_op_uint(const struct pipe_image_view *iview,
382b8e80941Smrg               const struct tgsi_image_params *params,
383b8e80941Smrg               bool just_read,
384b8e80941Smrg               char *data_ptr,
385b8e80941Smrg               uint qi,
386b8e80941Smrg               unsigned stride,
387b8e80941Smrg               enum tgsi_opcode opcode,
388b8e80941Smrg               int s,
389b8e80941Smrg               int t,
390b8e80941Smrg               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
391b8e80941Smrg               float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
392b8e80941Smrg{
393b8e80941Smrg   uint c;
394b8e80941Smrg   int nc = util_format_get_nr_components(params->format);
395b8e80941Smrg   unsigned sdata[4];
396b8e80941Smrg
397b8e80941Smrg   util_format_read_4ui(params->format,
398b8e80941Smrg                        sdata, 0,
399b8e80941Smrg                        data_ptr, stride,
400b8e80941Smrg                        s, t, 1, 1);
401b8e80941Smrg
402b8e80941Smrg   if (just_read) {
403b8e80941Smrg      for (c = 0; c < nc; c++) {
404b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = sdata[c];
405b8e80941Smrg      }
406b8e80941Smrg      return;
407b8e80941Smrg   }
408b8e80941Smrg   switch (opcode) {
409b8e80941Smrg   case TGSI_OPCODE_ATOMUADD:
410b8e80941Smrg      for (c = 0; c < nc; c++) {
411b8e80941Smrg         unsigned temp = sdata[c];
412b8e80941Smrg         sdata[c] += ((uint32_t *)rgba[c])[qi];
413b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
414b8e80941Smrg      }
415b8e80941Smrg      break;
416b8e80941Smrg   case TGSI_OPCODE_ATOMXCHG:
417b8e80941Smrg      for (c = 0; c < nc; c++) {
418b8e80941Smrg         unsigned temp = sdata[c];
419b8e80941Smrg         sdata[c] = ((uint32_t *)rgba[c])[qi];
420b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
421b8e80941Smrg      }
422b8e80941Smrg      break;
423b8e80941Smrg   case TGSI_OPCODE_ATOMCAS:
424b8e80941Smrg      for (c = 0; c < nc; c++) {
425b8e80941Smrg         unsigned dst_x = sdata[c];
426b8e80941Smrg         unsigned cmp_x = ((uint32_t *)rgba[c])[qi];
427b8e80941Smrg         unsigned src_x = ((uint32_t *)rgba2[c])[qi];
428b8e80941Smrg         unsigned temp = sdata[c];
429b8e80941Smrg         sdata[c] = (dst_x == cmp_x) ? src_x : dst_x;
430b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
431b8e80941Smrg      }
432b8e80941Smrg      break;
433b8e80941Smrg   case TGSI_OPCODE_ATOMAND:
434b8e80941Smrg      for (c = 0; c < nc; c++) {
435b8e80941Smrg         unsigned temp = sdata[c];
436b8e80941Smrg         sdata[c] &= ((uint32_t *)rgba[c])[qi];
437b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
438b8e80941Smrg      }
439b8e80941Smrg      break;
440b8e80941Smrg   case TGSI_OPCODE_ATOMOR:
441b8e80941Smrg      for (c = 0; c < nc; c++) {
442b8e80941Smrg         unsigned temp = sdata[c];
443b8e80941Smrg         sdata[c] |= ((uint32_t *)rgba[c])[qi];
444b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
445b8e80941Smrg      }
446b8e80941Smrg      break;
447b8e80941Smrg   case TGSI_OPCODE_ATOMXOR:
448b8e80941Smrg      for (c = 0; c < nc; c++) {
449b8e80941Smrg         unsigned temp = sdata[c];
450b8e80941Smrg         sdata[c] ^= ((uint32_t *)rgba[c])[qi];
451b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
452b8e80941Smrg      }
453b8e80941Smrg      break;
454b8e80941Smrg   case TGSI_OPCODE_ATOMUMIN:
455b8e80941Smrg      for (c = 0; c < nc; c++) {
456b8e80941Smrg         unsigned dst_x = sdata[c];
457b8e80941Smrg         unsigned src_x = ((uint32_t *)rgba[c])[qi];
458b8e80941Smrg         sdata[c] = MIN2(dst_x, src_x);
459b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
460b8e80941Smrg      }
461b8e80941Smrg      break;
462b8e80941Smrg   case TGSI_OPCODE_ATOMUMAX:
463b8e80941Smrg      for (c = 0; c < nc; c++) {
464b8e80941Smrg         unsigned dst_x = sdata[c];
465b8e80941Smrg         unsigned src_x = ((uint32_t *)rgba[c])[qi];
466b8e80941Smrg         sdata[c] = MAX2(dst_x, src_x);
467b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
468b8e80941Smrg      }
469b8e80941Smrg      break;
470b8e80941Smrg   case TGSI_OPCODE_ATOMIMIN:
471b8e80941Smrg      for (c = 0; c < nc; c++) {
472b8e80941Smrg         int dst_x = sdata[c];
473b8e80941Smrg         int src_x = ((uint32_t *)rgba[c])[qi];
474b8e80941Smrg         sdata[c] = MIN2(dst_x, src_x);
475b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
476b8e80941Smrg      }
477b8e80941Smrg      break;
478b8e80941Smrg   case TGSI_OPCODE_ATOMIMAX:
479b8e80941Smrg      for (c = 0; c < nc; c++) {
480b8e80941Smrg         int dst_x = sdata[c];
481b8e80941Smrg         int src_x = ((uint32_t *)rgba[c])[qi];
482b8e80941Smrg         sdata[c] = MAX2(dst_x, src_x);
483b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
484b8e80941Smrg      }
485b8e80941Smrg      break;
486b8e80941Smrg   default:
487b8e80941Smrg      assert(!"Unexpected TGSI opcode in sp_tgsi_op");
488b8e80941Smrg      break;
489b8e80941Smrg   }
490b8e80941Smrg   util_format_write_4ui(params->format, sdata, 0, data_ptr, stride,
491b8e80941Smrg                         s, t, 1, 1);
492b8e80941Smrg}
493b8e80941Smrg
494b8e80941Smrg/*
495b8e80941Smrg * Implement atomic operations on signed integers.
496b8e80941Smrg */
497b8e80941Smrgstatic void
498b8e80941Smrghandle_op_int(const struct pipe_image_view *iview,
499b8e80941Smrg              const struct tgsi_image_params *params,
500b8e80941Smrg              bool just_read,
501b8e80941Smrg              char *data_ptr,
502b8e80941Smrg              uint qi,
503b8e80941Smrg              unsigned stride,
504b8e80941Smrg              enum tgsi_opcode opcode,
505b8e80941Smrg              int s,
506b8e80941Smrg              int t,
507b8e80941Smrg              float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
508b8e80941Smrg              float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
509b8e80941Smrg{
510b8e80941Smrg   uint c;
511b8e80941Smrg   int nc = util_format_get_nr_components(params->format);
512b8e80941Smrg   int sdata[4];
513b8e80941Smrg   util_format_read_4i(params->format,
514b8e80941Smrg                       sdata, 0,
515b8e80941Smrg                       data_ptr, stride,
516b8e80941Smrg                       s, t, 1, 1);
517b8e80941Smrg
518b8e80941Smrg   if (just_read) {
519b8e80941Smrg      for (c = 0; c < nc; c++) {
520b8e80941Smrg         ((int32_t *)rgba[c])[qi] = sdata[c];
521b8e80941Smrg      }
522b8e80941Smrg      return;
523b8e80941Smrg   }
524b8e80941Smrg   switch (opcode) {
525b8e80941Smrg   case TGSI_OPCODE_ATOMUADD:
526b8e80941Smrg      for (c = 0; c < nc; c++) {
527b8e80941Smrg         int temp = sdata[c];
528b8e80941Smrg         sdata[c] += ((int32_t *)rgba[c])[qi];
529b8e80941Smrg         ((int32_t *)rgba[c])[qi] = temp;
530b8e80941Smrg      }
531b8e80941Smrg      break;
532b8e80941Smrg   case TGSI_OPCODE_ATOMXCHG:
533b8e80941Smrg      for (c = 0; c < nc; c++) {
534b8e80941Smrg         int temp = sdata[c];
535b8e80941Smrg         sdata[c] = ((int32_t *)rgba[c])[qi];
536b8e80941Smrg         ((int32_t *)rgba[c])[qi] = temp;
537b8e80941Smrg      }
538b8e80941Smrg      break;
539b8e80941Smrg   case TGSI_OPCODE_ATOMCAS:
540b8e80941Smrg      for (c = 0; c < nc; c++) {
541b8e80941Smrg         int dst_x = sdata[c];
542b8e80941Smrg         int cmp_x = ((int32_t *)rgba[c])[qi];
543b8e80941Smrg         int src_x = ((int32_t *)rgba2[c])[qi];
544b8e80941Smrg         int temp = sdata[c];
545b8e80941Smrg         sdata[c] = (dst_x == cmp_x) ? src_x : dst_x;
546b8e80941Smrg         ((int32_t *)rgba[c])[qi] = temp;
547b8e80941Smrg      }
548b8e80941Smrg      break;
549b8e80941Smrg   case TGSI_OPCODE_ATOMAND:
550b8e80941Smrg      for (c = 0; c < nc; c++) {
551b8e80941Smrg         int temp = sdata[c];
552b8e80941Smrg         sdata[c] &= ((int32_t *)rgba[c])[qi];
553b8e80941Smrg         ((int32_t *)rgba[c])[qi] = temp;
554b8e80941Smrg      }
555b8e80941Smrg      break;
556b8e80941Smrg   case TGSI_OPCODE_ATOMOR:
557b8e80941Smrg      for (c = 0; c < nc; c++) {
558b8e80941Smrg         int temp = sdata[c];
559b8e80941Smrg         sdata[c] |= ((int32_t *)rgba[c])[qi];
560b8e80941Smrg         ((int32_t *)rgba[c])[qi] = temp;
561b8e80941Smrg      }
562b8e80941Smrg      break;
563b8e80941Smrg   case TGSI_OPCODE_ATOMXOR:
564b8e80941Smrg      for (c = 0; c < nc; c++) {
565b8e80941Smrg         int temp = sdata[c];
566b8e80941Smrg         sdata[c] ^= ((int32_t *)rgba[c])[qi];
567b8e80941Smrg         ((int32_t *)rgba[c])[qi] = temp;
568b8e80941Smrg      }
569b8e80941Smrg      break;
570b8e80941Smrg   case TGSI_OPCODE_ATOMUMIN:
571b8e80941Smrg      for (c = 0; c < nc; c++) {
572b8e80941Smrg         int dst_x = sdata[c];
573b8e80941Smrg         int src_x = ((int32_t *)rgba[c])[qi];
574b8e80941Smrg         sdata[c] = MIN2(dst_x, src_x);
575b8e80941Smrg         ((int32_t *)rgba[c])[qi] = dst_x;
576b8e80941Smrg      }
577b8e80941Smrg      break;
578b8e80941Smrg   case TGSI_OPCODE_ATOMUMAX:
579b8e80941Smrg      for (c = 0; c < nc; c++) {
580b8e80941Smrg         int dst_x = sdata[c];
581b8e80941Smrg         int src_x = ((int32_t *)rgba[c])[qi];
582b8e80941Smrg         sdata[c] = MAX2(dst_x, src_x);
583b8e80941Smrg         ((int32_t *)rgba[c])[qi] = dst_x;
584b8e80941Smrg      }
585b8e80941Smrg      break;
586b8e80941Smrg   case TGSI_OPCODE_ATOMIMIN:
587b8e80941Smrg      for (c = 0; c < nc; c++) {
588b8e80941Smrg         int dst_x = sdata[c];
589b8e80941Smrg         int src_x = ((int32_t *)rgba[c])[qi];
590b8e80941Smrg         sdata[c] = MIN2(dst_x, src_x);
591b8e80941Smrg         ((int32_t *)rgba[c])[qi] = dst_x;
592b8e80941Smrg      }
593b8e80941Smrg      break;
594b8e80941Smrg   case TGSI_OPCODE_ATOMIMAX:
595b8e80941Smrg      for (c = 0; c < nc; c++) {
596b8e80941Smrg         int dst_x = sdata[c];
597b8e80941Smrg         int src_x = ((int32_t *)rgba[c])[qi];
598b8e80941Smrg         sdata[c] = MAX2(dst_x, src_x);
599b8e80941Smrg         ((int32_t *)rgba[c])[qi] = dst_x;
600b8e80941Smrg      }
601b8e80941Smrg      break;
602b8e80941Smrg   default:
603b8e80941Smrg      assert(!"Unexpected TGSI opcode in sp_tgsi_op");
604b8e80941Smrg      break;
605b8e80941Smrg   }
606b8e80941Smrg   util_format_write_4i(params->format, sdata, 0, data_ptr, stride,
607b8e80941Smrg                        s, t, 1, 1);
608b8e80941Smrg}
609b8e80941Smrg
610b8e80941Smrg/* GLES OES_shader_image_atomic.txt allows XCHG on R32F */
611b8e80941Smrgstatic void
612b8e80941Smrghandle_op_r32f_xchg(const struct pipe_image_view *iview,
613b8e80941Smrg                    const struct tgsi_image_params *params,
614b8e80941Smrg                    bool just_read,
615b8e80941Smrg                    char *data_ptr,
616b8e80941Smrg                    uint qi,
617b8e80941Smrg                    unsigned stride,
618b8e80941Smrg                    enum tgsi_opcode opcode,
619b8e80941Smrg                    int s,
620b8e80941Smrg                    int t,
621b8e80941Smrg                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
622b8e80941Smrg{
623b8e80941Smrg   float sdata[4];
624b8e80941Smrg   uint c;
625b8e80941Smrg   int nc = 1;
626b8e80941Smrg   util_format_read_4f(params->format,
627b8e80941Smrg                       sdata, 0,
628b8e80941Smrg                       data_ptr, stride,
629b8e80941Smrg                       s, t, 1, 1);
630b8e80941Smrg   if (just_read) {
631b8e80941Smrg      for (c = 0; c < nc; c++) {
632b8e80941Smrg         ((int32_t *)rgba[c])[qi] = sdata[c];
633b8e80941Smrg      }
634b8e80941Smrg      return;
635b8e80941Smrg   }
636b8e80941Smrg
637b8e80941Smrg   for (c = 0; c < nc; c++) {
638b8e80941Smrg      int temp = sdata[c];
639b8e80941Smrg      sdata[c] = ((float *)rgba[c])[qi];
640b8e80941Smrg      ((float *)rgba[c])[qi] = temp;
641b8e80941Smrg   }
642b8e80941Smrg   util_format_write_4f(params->format, sdata, 0, data_ptr, stride,
643b8e80941Smrg                        s, t, 1, 1);
644b8e80941Smrg}
645b8e80941Smrg
646b8e80941Smrg/*
647b8e80941Smrg * Implement atomic image operations.
648b8e80941Smrg */
649b8e80941Smrgstatic void
650b8e80941Smrgsp_tgsi_op(const struct tgsi_image *image,
651b8e80941Smrg           const struct tgsi_image_params *params,
652b8e80941Smrg           enum tgsi_opcode opcode,
653b8e80941Smrg           const int s[TGSI_QUAD_SIZE],
654b8e80941Smrg           const int t[TGSI_QUAD_SIZE],
655b8e80941Smrg           const int r[TGSI_QUAD_SIZE],
656b8e80941Smrg           const int sample[TGSI_QUAD_SIZE],
657b8e80941Smrg           float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
658b8e80941Smrg           float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
659b8e80941Smrg{
660b8e80941Smrg   struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
661b8e80941Smrg   struct pipe_image_view *iview;
662b8e80941Smrg   struct softpipe_resource *spr;
663b8e80941Smrg   unsigned width, height, depth;
664b8e80941Smrg   unsigned stride;
665b8e80941Smrg   int j, c;
666b8e80941Smrg   unsigned offset;
667b8e80941Smrg   char *data_ptr;
668b8e80941Smrg
669b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_IMAGES)
670b8e80941Smrg      return;
671b8e80941Smrg   iview = &sp_img->sp_iview[params->unit];
672b8e80941Smrg   spr = (struct softpipe_resource *)iview->resource;
673b8e80941Smrg   if (!spr)
674b8e80941Smrg      goto fail_write_all_zero;
675b8e80941Smrg   if (!has_compat_target(spr->base.target, params->tgsi_tex_instr))
676b8e80941Smrg      goto fail_write_all_zero;
677b8e80941Smrg
678b8e80941Smrg   if (!get_dimensions(iview, spr, params->tgsi_tex_instr,
679b8e80941Smrg                       params->format, &width, &height, &depth))
680b8e80941Smrg      goto fail_write_all_zero;
681b8e80941Smrg
682b8e80941Smrg   stride = util_format_get_stride(spr->base.format, width);
683b8e80941Smrg
684b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
685b8e80941Smrg      int s_coord, t_coord, r_coord;
686b8e80941Smrg      bool just_read = false;
687b8e80941Smrg
688b8e80941Smrg      fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord);
689b8e80941Smrg      if (!bounds_check(width, height, depth,
690b8e80941Smrg                        s_coord, t_coord, r_coord)) {
691b8e80941Smrg         int nc = util_format_get_nr_components(params->format);
692b8e80941Smrg         int ival = util_format_is_pure_integer(params->format);
693b8e80941Smrg         int c;
694b8e80941Smrg         for (c = 0; c < 4; c++) {
695b8e80941Smrg            rgba[c][j] = 0;
696b8e80941Smrg            if (c == 3 && nc < 4) {
697b8e80941Smrg               if (ival)
698b8e80941Smrg                  ((int32_t *)rgba[c])[j] = 1;
699b8e80941Smrg               else
700b8e80941Smrg                  rgba[c][j] = 1.0;
701b8e80941Smrg            }
702b8e80941Smrg         }
703b8e80941Smrg         continue;
704b8e80941Smrg      }
705b8e80941Smrg
706b8e80941Smrg      /* just readback the value for atomic if execmask isn't set */
707b8e80941Smrg      if (!(params->execmask & (1 << j))) {
708b8e80941Smrg         just_read = true;
709b8e80941Smrg      }
710b8e80941Smrg
711b8e80941Smrg      offset = get_image_offset(spr, iview, params->format, r_coord);
712b8e80941Smrg      data_ptr = (char *)spr->data + offset;
713b8e80941Smrg
714b8e80941Smrg      /* we should see atomic operations on r32 formats */
715b8e80941Smrg      if (util_format_is_pure_uint(params->format))
716b8e80941Smrg         handle_op_uint(iview, params, just_read, data_ptr, j, stride,
717b8e80941Smrg                        opcode, s_coord, t_coord, rgba, rgba2);
718b8e80941Smrg      else if (util_format_is_pure_sint(params->format))
719b8e80941Smrg         handle_op_int(iview, params, just_read, data_ptr, j, stride,
720b8e80941Smrg                       opcode, s_coord, t_coord, rgba, rgba2);
721b8e80941Smrg      else if (params->format == PIPE_FORMAT_R32_FLOAT &&
722b8e80941Smrg               opcode == TGSI_OPCODE_ATOMXCHG)
723b8e80941Smrg         handle_op_r32f_xchg(iview, params, just_read, data_ptr, j, stride,
724b8e80941Smrg                             opcode, s_coord, t_coord, rgba);
725b8e80941Smrg      else
726b8e80941Smrg         assert(0);
727b8e80941Smrg   }
728b8e80941Smrg   return;
729b8e80941Smrgfail_write_all_zero:
730b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
731b8e80941Smrg      for (c = 0; c < 4; c++)
732b8e80941Smrg         rgba[c][j] = 0;
733b8e80941Smrg   }
734b8e80941Smrg   return;
735b8e80941Smrg}
736b8e80941Smrg
737b8e80941Smrgstatic void
738b8e80941Smrgsp_tgsi_get_dims(const struct tgsi_image *image,
739b8e80941Smrg                 const struct tgsi_image_params *params,
740b8e80941Smrg                 int dims[4])
741b8e80941Smrg{
742b8e80941Smrg   struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image;
743b8e80941Smrg   struct pipe_image_view *iview;
744b8e80941Smrg   struct softpipe_resource *spr;
745b8e80941Smrg   int level;
746b8e80941Smrg
747b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_IMAGES)
748b8e80941Smrg      return;
749b8e80941Smrg   iview = &sp_img->sp_iview[params->unit];
750b8e80941Smrg   spr = (struct softpipe_resource *)iview->resource;
751b8e80941Smrg   if (!spr)
752b8e80941Smrg      return;
753b8e80941Smrg
754b8e80941Smrg   if (params->tgsi_tex_instr == TGSI_TEXTURE_BUFFER) {
755b8e80941Smrg      dims[0] = iview->u.buf.size / util_format_get_blocksize(iview->format);
756b8e80941Smrg      dims[1] = dims[2] = dims[3] = 0;
757b8e80941Smrg      return;
758b8e80941Smrg   }
759b8e80941Smrg
760b8e80941Smrg   level = iview->u.tex.level;
761b8e80941Smrg   dims[0] = u_minify(spr->base.width0, level);
762b8e80941Smrg   switch (params->tgsi_tex_instr) {
763b8e80941Smrg   case TGSI_TEXTURE_1D_ARRAY:
764b8e80941Smrg      dims[1] = iview->u.tex.last_layer - iview->u.tex.first_layer + 1;
765b8e80941Smrg      /* fallthrough */
766b8e80941Smrg   case TGSI_TEXTURE_1D:
767b8e80941Smrg      return;
768b8e80941Smrg   case TGSI_TEXTURE_2D_ARRAY:
769b8e80941Smrg      dims[2] = iview->u.tex.last_layer - iview->u.tex.first_layer + 1;
770b8e80941Smrg      /* fallthrough */
771b8e80941Smrg   case TGSI_TEXTURE_2D:
772b8e80941Smrg   case TGSI_TEXTURE_CUBE:
773b8e80941Smrg   case TGSI_TEXTURE_RECT:
774b8e80941Smrg      dims[1] = u_minify(spr->base.height0, level);
775b8e80941Smrg      return;
776b8e80941Smrg   case TGSI_TEXTURE_3D:
777b8e80941Smrg      dims[1] = u_minify(spr->base.height0, level);
778b8e80941Smrg      dims[2] = u_minify(spr->base.depth0, level);
779b8e80941Smrg      return;
780b8e80941Smrg   case TGSI_TEXTURE_CUBE_ARRAY:
781b8e80941Smrg      dims[1] = u_minify(spr->base.height0, level);
782b8e80941Smrg      dims[2] = (iview->u.tex.last_layer - iview->u.tex.first_layer + 1) / 6;
783b8e80941Smrg      break;
784b8e80941Smrg   default:
785b8e80941Smrg      assert(!"unexpected texture target in sp_get_dims()");
786b8e80941Smrg      return;
787b8e80941Smrg   }
788b8e80941Smrg}
789b8e80941Smrg
790b8e80941Smrgstruct sp_tgsi_image *
791b8e80941Smrgsp_create_tgsi_image(void)
792b8e80941Smrg{
793b8e80941Smrg   struct sp_tgsi_image *img = CALLOC_STRUCT(sp_tgsi_image);
794b8e80941Smrg   if (!img)
795b8e80941Smrg      return NULL;
796b8e80941Smrg
797b8e80941Smrg   img->base.load = sp_tgsi_load;
798b8e80941Smrg   img->base.store = sp_tgsi_store;
799b8e80941Smrg   img->base.op = sp_tgsi_op;
800b8e80941Smrg   img->base.get_dims = sp_tgsi_get_dims;
801b8e80941Smrg   return img;
802b8e80941Smrg};
803