1b8e80941Smrg/*
2b8e80941Smrg * Copyright 2016 Red Hat.
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub
8b8e80941Smrg * license, and/or sell copies of the Software, and to permit persons to whom
9b8e80941Smrg * the Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "sp_context.h"
25b8e80941Smrg#include "sp_buffer.h"
26b8e80941Smrg#include "sp_texture.h"
27b8e80941Smrg
28b8e80941Smrg#include "util/u_format.h"
29b8e80941Smrg
30b8e80941Smrgstatic bool
31b8e80941Smrgget_dimensions(const struct pipe_shader_buffer *bview,
32b8e80941Smrg               const struct softpipe_resource *spr,
33b8e80941Smrg               unsigned *width)
34b8e80941Smrg{
35b8e80941Smrg   *width = bview->buffer_size;
36b8e80941Smrg   /*
37b8e80941Smrg    * Bounds check the buffer size from the view
38b8e80941Smrg    * and the buffer size from the underlying buffer.
39b8e80941Smrg    */
40b8e80941Smrg   if (*width > spr->base.width0)
41b8e80941Smrg      return false;
42b8e80941Smrg   return true;
43b8e80941Smrg}
44b8e80941Smrg
45b8e80941Smrg/*
46b8e80941Smrg * Implement the image LOAD operation.
47b8e80941Smrg */
48b8e80941Smrgstatic void
49b8e80941Smrgsp_tgsi_load(const struct tgsi_buffer *buffer,
50b8e80941Smrg             const struct tgsi_buffer_params *params,
51b8e80941Smrg             const int s[TGSI_QUAD_SIZE],
52b8e80941Smrg             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
53b8e80941Smrg{
54b8e80941Smrg   struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
55b8e80941Smrg   struct pipe_shader_buffer *bview;
56b8e80941Smrg   struct softpipe_resource *spr;
57b8e80941Smrg   unsigned width;
58b8e80941Smrg   int c, j;
59b8e80941Smrg   unsigned char *data_ptr;
60b8e80941Smrg   const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
61b8e80941Smrg
62b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
63b8e80941Smrg      goto fail_write_all_zero;
64b8e80941Smrg
65b8e80941Smrg   bview = &sp_buf->sp_bview[params->unit];
66b8e80941Smrg   spr = softpipe_resource(bview->buffer);
67b8e80941Smrg   if (!spr)
68b8e80941Smrg      goto fail_write_all_zero;
69b8e80941Smrg
70b8e80941Smrg   if (!get_dimensions(bview, spr, &width))
71b8e80941Smrg      return;
72b8e80941Smrg
73b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
74b8e80941Smrg      int s_coord;
75b8e80941Smrg      bool fill_zero = false;
76b8e80941Smrg      uint32_t sdata[4];
77b8e80941Smrg
78b8e80941Smrg      if (!(params->execmask & (1 << j)))
79b8e80941Smrg         fill_zero = true;
80b8e80941Smrg
81b8e80941Smrg      s_coord = s[j];
82b8e80941Smrg      if (s_coord >= width)
83b8e80941Smrg         fill_zero = true;
84b8e80941Smrg
85b8e80941Smrg      if (fill_zero) {
86b8e80941Smrg         for (c = 0; c < 4; c++)
87b8e80941Smrg            rgba[c][j] = 0;
88b8e80941Smrg         continue;
89b8e80941Smrg      }
90b8e80941Smrg      data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
91b8e80941Smrg      for (c = 0; c < 4; c++) {
92b8e80941Smrg         format_desc->fetch_rgba_uint(sdata, data_ptr, 0, 0);
93b8e80941Smrg         ((uint32_t *)rgba[c])[j] = sdata[0];
94b8e80941Smrg         data_ptr += 4;
95b8e80941Smrg      }
96b8e80941Smrg   }
97b8e80941Smrg   return;
98b8e80941Smrgfail_write_all_zero:
99b8e80941Smrg   memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4);
100b8e80941Smrg   return;
101b8e80941Smrg}
102b8e80941Smrg
103b8e80941Smrg/*
104b8e80941Smrg * Implement the buffer STORE operation.
105b8e80941Smrg */
106b8e80941Smrgstatic void
107b8e80941Smrgsp_tgsi_store(const struct tgsi_buffer *buffer,
108b8e80941Smrg              const struct tgsi_buffer_params *params,
109b8e80941Smrg              const int s[TGSI_QUAD_SIZE],
110b8e80941Smrg              float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
111b8e80941Smrg{
112b8e80941Smrg   struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
113b8e80941Smrg   struct pipe_shader_buffer *bview;
114b8e80941Smrg   struct softpipe_resource *spr;
115b8e80941Smrg   unsigned width;
116b8e80941Smrg   unsigned char *data_ptr;
117b8e80941Smrg   int j, c;
118b8e80941Smrg   const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
119b8e80941Smrg
120b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
121b8e80941Smrg      return;
122b8e80941Smrg
123b8e80941Smrg   bview = &sp_buf->sp_bview[params->unit];
124b8e80941Smrg   spr = softpipe_resource(bview->buffer);
125b8e80941Smrg   if (!spr)
126b8e80941Smrg      return;
127b8e80941Smrg
128b8e80941Smrg   if (!get_dimensions(bview, spr, &width))
129b8e80941Smrg      return;
130b8e80941Smrg
131b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
132b8e80941Smrg      int s_coord;
133b8e80941Smrg
134b8e80941Smrg      if (!(params->execmask & (1 << j)))
135b8e80941Smrg         continue;
136b8e80941Smrg
137b8e80941Smrg      s_coord = s[j];
138b8e80941Smrg      if (s_coord >= width)
139b8e80941Smrg         continue;
140b8e80941Smrg
141b8e80941Smrg      data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
142b8e80941Smrg
143b8e80941Smrg      for (c = 0; c < 4; c++) {
144b8e80941Smrg         if (params->writemask & (1 << c)) {
145b8e80941Smrg            unsigned temp[4];
146b8e80941Smrg            unsigned char *dptr = data_ptr + (c * 4);
147b8e80941Smrg            temp[0] = ((uint32_t *)rgba[c])[j];
148b8e80941Smrg            format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1);
149b8e80941Smrg         }
150b8e80941Smrg      }
151b8e80941Smrg   }
152b8e80941Smrg}
153b8e80941Smrg
154b8e80941Smrg/*
155b8e80941Smrg * Implement atomic operations on unsigned integers.
156b8e80941Smrg */
157b8e80941Smrgstatic void
158b8e80941Smrghandle_op_atomic(const struct pipe_shader_buffer *bview,
159b8e80941Smrg                 bool just_read,
160b8e80941Smrg                 unsigned char *data_ptr,
161b8e80941Smrg                 uint qi,
162b8e80941Smrg                 enum tgsi_opcode opcode,
163b8e80941Smrg                 unsigned writemask,
164b8e80941Smrg                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
165b8e80941Smrg                 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
166b8e80941Smrg{
167b8e80941Smrg   uint c;
168b8e80941Smrg   const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT);
169b8e80941Smrg   unsigned sdata[4];
170b8e80941Smrg
171b8e80941Smrg   for (c = 0; c < 4; c++) {
172b8e80941Smrg      unsigned temp[4];
173b8e80941Smrg      unsigned char *dptr = data_ptr + (c * 4);
174b8e80941Smrg      format_desc->fetch_rgba_uint(temp, dptr, 0, 0);
175b8e80941Smrg      sdata[c] = temp[0];
176b8e80941Smrg   }
177b8e80941Smrg
178b8e80941Smrg   if (just_read) {
179b8e80941Smrg      for (c = 0; c < 4; c++) {
180b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = sdata[c];
181b8e80941Smrg      }
182b8e80941Smrg      return;
183b8e80941Smrg   }
184b8e80941Smrg
185b8e80941Smrg   switch (opcode) {
186b8e80941Smrg   case TGSI_OPCODE_ATOMUADD:
187b8e80941Smrg      for (c = 0; c < 4; c++) {
188b8e80941Smrg         unsigned temp = sdata[c];
189b8e80941Smrg         sdata[c] += ((uint32_t *)rgba[c])[qi];
190b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
191b8e80941Smrg      }
192b8e80941Smrg      break;
193b8e80941Smrg   case TGSI_OPCODE_ATOMXCHG:
194b8e80941Smrg      for (c = 0; c < 4; c++) {
195b8e80941Smrg         unsigned temp = sdata[c];
196b8e80941Smrg         sdata[c] = ((uint32_t *)rgba[c])[qi];
197b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
198b8e80941Smrg      }
199b8e80941Smrg      break;
200b8e80941Smrg   case TGSI_OPCODE_ATOMCAS:
201b8e80941Smrg      for (c = 0; c < 4; c++) {
202b8e80941Smrg         unsigned dst_x = sdata[c];
203b8e80941Smrg         unsigned cmp_x = ((uint32_t *)rgba[c])[qi];
204b8e80941Smrg         unsigned src_x = ((uint32_t *)rgba2[c])[qi];
205b8e80941Smrg         unsigned temp = sdata[c];
206b8e80941Smrg         sdata[c] = (dst_x == cmp_x) ? src_x : dst_x;
207b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
208b8e80941Smrg      }
209b8e80941Smrg      break;
210b8e80941Smrg   case TGSI_OPCODE_ATOMAND:
211b8e80941Smrg      for (c = 0; c < 4; c++) {
212b8e80941Smrg         unsigned temp = sdata[c];
213b8e80941Smrg         sdata[c] &= ((uint32_t *)rgba[c])[qi];
214b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
215b8e80941Smrg      }
216b8e80941Smrg      break;
217b8e80941Smrg   case TGSI_OPCODE_ATOMOR:
218b8e80941Smrg      for (c = 0; c < 4; c++) {
219b8e80941Smrg         unsigned temp = sdata[c];
220b8e80941Smrg         sdata[c] |= ((uint32_t *)rgba[c])[qi];
221b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
222b8e80941Smrg      }
223b8e80941Smrg      break;
224b8e80941Smrg   case TGSI_OPCODE_ATOMXOR:
225b8e80941Smrg      for (c = 0; c < 4; c++) {
226b8e80941Smrg         unsigned temp = sdata[c];
227b8e80941Smrg         sdata[c] ^= ((uint32_t *)rgba[c])[qi];
228b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = temp;
229b8e80941Smrg      }
230b8e80941Smrg      break;
231b8e80941Smrg   case TGSI_OPCODE_ATOMUMIN:
232b8e80941Smrg      for (c = 0; c < 4; c++) {
233b8e80941Smrg         unsigned dst_x = sdata[c];
234b8e80941Smrg         unsigned src_x = ((uint32_t *)rgba[c])[qi];
235b8e80941Smrg         sdata[c] = MIN2(dst_x, src_x);
236b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
237b8e80941Smrg      }
238b8e80941Smrg      break;
239b8e80941Smrg   case TGSI_OPCODE_ATOMUMAX:
240b8e80941Smrg      for (c = 0; c < 4; c++) {
241b8e80941Smrg         unsigned dst_x = sdata[c];
242b8e80941Smrg         unsigned src_x = ((uint32_t *)rgba[c])[qi];
243b8e80941Smrg         sdata[c] = MAX2(dst_x, src_x);
244b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
245b8e80941Smrg      }
246b8e80941Smrg      break;
247b8e80941Smrg   case TGSI_OPCODE_ATOMIMIN:
248b8e80941Smrg      for (c = 0; c < 4; c++) {
249b8e80941Smrg         int dst_x = sdata[c];
250b8e80941Smrg         int src_x = ((uint32_t *)rgba[c])[qi];
251b8e80941Smrg         sdata[c] = MIN2(dst_x, src_x);
252b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
253b8e80941Smrg      }
254b8e80941Smrg      break;
255b8e80941Smrg   case TGSI_OPCODE_ATOMIMAX:
256b8e80941Smrg      for (c = 0; c < 4; c++) {
257b8e80941Smrg         int dst_x = sdata[c];
258b8e80941Smrg         int src_x = ((uint32_t *)rgba[c])[qi];
259b8e80941Smrg         sdata[c] = MAX2(dst_x, src_x);
260b8e80941Smrg         ((uint32_t *)rgba[c])[qi] = dst_x;
261b8e80941Smrg      }
262b8e80941Smrg      break;
263b8e80941Smrg   case TGSI_OPCODE_ATOMFADD:
264b8e80941Smrg      for (c = 0; c < 4; c++) {
265b8e80941Smrg         float temp = uif(sdata[c]);
266b8e80941Smrg         sdata[c] = fui(temp + rgba[c][qi]);
267b8e80941Smrg         rgba[c][qi] = temp;
268b8e80941Smrg      }
269b8e80941Smrg      break;
270b8e80941Smrg   default:
271b8e80941Smrg      assert(!"Unexpected TGSI opcode in sp_tgsi_op");
272b8e80941Smrg      break;
273b8e80941Smrg   }
274b8e80941Smrg
275b8e80941Smrg   for (c = 0; c < 4; c++) {
276b8e80941Smrg      if (writemask & (1 << c)) {
277b8e80941Smrg         unsigned temp[4];
278b8e80941Smrg         unsigned char *dptr = data_ptr + (c * 4);
279b8e80941Smrg         temp[0] = sdata[c];
280b8e80941Smrg         format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1);
281b8e80941Smrg      }
282b8e80941Smrg   }
283b8e80941Smrg}
284b8e80941Smrg
285b8e80941Smrg/*
286b8e80941Smrg * Implement atomic buffer operations.
287b8e80941Smrg */
288b8e80941Smrgstatic void
289b8e80941Smrgsp_tgsi_op(const struct tgsi_buffer *buffer,
290b8e80941Smrg           const struct tgsi_buffer_params *params,
291b8e80941Smrg           enum tgsi_opcode opcode,
292b8e80941Smrg           const int s[TGSI_QUAD_SIZE],
293b8e80941Smrg           float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
294b8e80941Smrg           float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
295b8e80941Smrg{
296b8e80941Smrg   struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
297b8e80941Smrg   struct pipe_shader_buffer *bview;
298b8e80941Smrg   struct softpipe_resource *spr;
299b8e80941Smrg   unsigned width;
300b8e80941Smrg   int j, c;
301b8e80941Smrg   unsigned char *data_ptr;
302b8e80941Smrg
303b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
304b8e80941Smrg      return;
305b8e80941Smrg
306b8e80941Smrg   bview = &sp_buf->sp_bview[params->unit];
307b8e80941Smrg   spr = softpipe_resource(bview->buffer);
308b8e80941Smrg   if (!spr)
309b8e80941Smrg      goto fail_write_all_zero;
310b8e80941Smrg
311b8e80941Smrg   if (!get_dimensions(bview, spr, &width))
312b8e80941Smrg      goto fail_write_all_zero;
313b8e80941Smrg
314b8e80941Smrg   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
315b8e80941Smrg      int s_coord;
316b8e80941Smrg      bool just_read = false;
317b8e80941Smrg
318b8e80941Smrg      s_coord = s[j];
319b8e80941Smrg      if (s_coord >= width) {
320b8e80941Smrg         for (c = 0; c < 4; c++) {
321b8e80941Smrg            rgba[c][j] = 0;
322b8e80941Smrg         }
323b8e80941Smrg         continue;
324b8e80941Smrg      }
325b8e80941Smrg
326b8e80941Smrg      /* just readback the value for atomic if execmask isn't set */
327b8e80941Smrg      if (!(params->execmask & (1 << j))) {
328b8e80941Smrg         just_read = true;
329b8e80941Smrg      }
330b8e80941Smrg
331b8e80941Smrg      data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
332b8e80941Smrg      /* we should see atomic operations on r32 formats */
333b8e80941Smrg
334b8e80941Smrg      handle_op_atomic(bview, just_read, data_ptr, j,
335b8e80941Smrg                       opcode, params->writemask, rgba, rgba2);
336b8e80941Smrg   }
337b8e80941Smrg   return;
338b8e80941Smrgfail_write_all_zero:
339b8e80941Smrg   memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4);
340b8e80941Smrg   return;
341b8e80941Smrg}
342b8e80941Smrg
343b8e80941Smrg/*
344b8e80941Smrg * return size of the attached buffer for RESQ opcode.
345b8e80941Smrg */
346b8e80941Smrgstatic void
347b8e80941Smrgsp_tgsi_get_dims(const struct tgsi_buffer *buffer,
348b8e80941Smrg                 const struct tgsi_buffer_params *params,
349b8e80941Smrg                 int *dim)
350b8e80941Smrg{
351b8e80941Smrg   struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
352b8e80941Smrg   struct pipe_shader_buffer *bview;
353b8e80941Smrg   struct softpipe_resource *spr;
354b8e80941Smrg
355b8e80941Smrg   if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
356b8e80941Smrg      return;
357b8e80941Smrg
358b8e80941Smrg   bview = &sp_buf->sp_bview[params->unit];
359b8e80941Smrg   spr = softpipe_resource(bview->buffer);
360b8e80941Smrg   if (!spr)
361b8e80941Smrg      return;
362b8e80941Smrg
363b8e80941Smrg   *dim = bview->buffer_size;
364b8e80941Smrg}
365b8e80941Smrg
366b8e80941Smrgstruct sp_tgsi_buffer *
367b8e80941Smrgsp_create_tgsi_buffer(void)
368b8e80941Smrg{
369b8e80941Smrg   struct sp_tgsi_buffer *buf = CALLOC_STRUCT(sp_tgsi_buffer);
370b8e80941Smrg   if (!buf)
371b8e80941Smrg      return NULL;
372b8e80941Smrg
373b8e80941Smrg   buf->base.load = sp_tgsi_load;
374b8e80941Smrg   buf->base.store = sp_tgsi_store;
375b8e80941Smrg   buf->base.op = sp_tgsi_op;
376b8e80941Smrg   buf->base.get_dims = sp_tgsi_get_dims;
377b8e80941Smrg   return buf;
378b8e80941Smrg};
379