1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2016 Red Hat. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 8b8e80941Smrg * license, and/or sell copies of the Software, and to permit persons to whom 9b8e80941Smrg * the Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "sp_context.h" 25b8e80941Smrg#include "sp_image.h" 26b8e80941Smrg#include "sp_texture.h" 27b8e80941Smrg 28b8e80941Smrg#include "util/u_format.h" 29b8e80941Smrg 30b8e80941Smrg/* 31b8e80941Smrg * Get the offset into the base image 32b8e80941Smrg * first element for a buffer or layer/level for texture. 33b8e80941Smrg */ 34b8e80941Smrgstatic uint32_t 35b8e80941Smrgget_image_offset(const struct softpipe_resource *spr, 36b8e80941Smrg const struct pipe_image_view *iview, 37b8e80941Smrg enum pipe_format format, unsigned r_coord) 38b8e80941Smrg{ 39b8e80941Smrg int base_layer = 0; 40b8e80941Smrg 41b8e80941Smrg if (spr->base.target == PIPE_BUFFER) 42b8e80941Smrg return iview->u.buf.offset; 43b8e80941Smrg 44b8e80941Smrg if (spr->base.target == PIPE_TEXTURE_1D_ARRAY || 45b8e80941Smrg spr->base.target == PIPE_TEXTURE_2D_ARRAY || 46b8e80941Smrg spr->base.target == PIPE_TEXTURE_CUBE_ARRAY || 47b8e80941Smrg spr->base.target == PIPE_TEXTURE_CUBE || 48b8e80941Smrg spr->base.target == PIPE_TEXTURE_3D) 49b8e80941Smrg base_layer = r_coord + iview->u.tex.first_layer; 50b8e80941Smrg return softpipe_get_tex_image_offset(spr, iview->u.tex.level, base_layer); 51b8e80941Smrg} 52b8e80941Smrg 53b8e80941Smrg/* 54b8e80941Smrg * Does this texture instruction have a layer or depth parameter. 55b8e80941Smrg */ 56b8e80941Smrgstatic inline bool 57b8e80941Smrghas_layer_or_depth(unsigned tgsi_tex_instr) 58b8e80941Smrg{ 59b8e80941Smrg return (tgsi_tex_instr == TGSI_TEXTURE_3D || 60b8e80941Smrg tgsi_tex_instr == TGSI_TEXTURE_CUBE || 61b8e80941Smrg tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY || 62b8e80941Smrg tgsi_tex_instr == TGSI_TEXTURE_2D_ARRAY || 63b8e80941Smrg tgsi_tex_instr == TGSI_TEXTURE_CUBE_ARRAY || 64b8e80941Smrg tgsi_tex_instr == TGSI_TEXTURE_2D_ARRAY_MSAA); 65b8e80941Smrg} 66b8e80941Smrg 67b8e80941Smrg/* 68b8e80941Smrg * Is this texture instruction a single non-array coordinate. 69b8e80941Smrg */ 70b8e80941Smrgstatic inline bool 71b8e80941Smrghas_1coord(unsigned tgsi_tex_instr) 72b8e80941Smrg{ 73b8e80941Smrg return (tgsi_tex_instr == TGSI_TEXTURE_BUFFER || 74b8e80941Smrg tgsi_tex_instr == TGSI_TEXTURE_1D || 75b8e80941Smrg tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY); 76b8e80941Smrg} 77b8e80941Smrg 78b8e80941Smrg/* 79b8e80941Smrg * check the bounds vs w/h/d 80b8e80941Smrg */ 81b8e80941Smrgstatic inline bool 82b8e80941Smrgbounds_check(int width, int height, int depth, 83b8e80941Smrg int s, int t, int r) 84b8e80941Smrg{ 85b8e80941Smrg if (s < 0 || s >= width) 86b8e80941Smrg return false; 87b8e80941Smrg if (t < 0 || t >= height) 88b8e80941Smrg return false; 89b8e80941Smrg if (r < 0 || r >= depth) 90b8e80941Smrg return false; 91b8e80941Smrg return true; 92b8e80941Smrg} 93b8e80941Smrg 94b8e80941Smrg/* 95b8e80941Smrg * Checks if the texture target compatible with the image resource 96b8e80941Smrg * pipe target. 97b8e80941Smrg */ 98b8e80941Smrgstatic inline bool 99b8e80941Smrghas_compat_target(unsigned pipe_target, unsigned tgsi_target) 100b8e80941Smrg{ 101b8e80941Smrg switch (pipe_target) { 102b8e80941Smrg case PIPE_TEXTURE_1D: 103b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_1D) 104b8e80941Smrg return true; 105b8e80941Smrg break; 106b8e80941Smrg case PIPE_TEXTURE_2D: 107b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_2D) 108b8e80941Smrg return true; 109b8e80941Smrg break; 110b8e80941Smrg case PIPE_TEXTURE_RECT: 111b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_RECT) 112b8e80941Smrg return true; 113b8e80941Smrg break; 114b8e80941Smrg case PIPE_TEXTURE_3D: 115b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_3D || 116b8e80941Smrg tgsi_target == TGSI_TEXTURE_2D) 117b8e80941Smrg return true; 118b8e80941Smrg break; 119b8e80941Smrg case PIPE_TEXTURE_CUBE: 120b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_CUBE || 121b8e80941Smrg tgsi_target == TGSI_TEXTURE_2D) 122b8e80941Smrg return true; 123b8e80941Smrg break; 124b8e80941Smrg case PIPE_TEXTURE_1D_ARRAY: 125b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_1D || 126b8e80941Smrg tgsi_target == TGSI_TEXTURE_1D_ARRAY) 127b8e80941Smrg return true; 128b8e80941Smrg break; 129b8e80941Smrg case PIPE_TEXTURE_2D_ARRAY: 130b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_2D || 131b8e80941Smrg tgsi_target == TGSI_TEXTURE_2D_ARRAY) 132b8e80941Smrg return true; 133b8e80941Smrg break; 134b8e80941Smrg case PIPE_TEXTURE_CUBE_ARRAY: 135b8e80941Smrg if (tgsi_target == TGSI_TEXTURE_CUBE || 136b8e80941Smrg tgsi_target == TGSI_TEXTURE_CUBE_ARRAY || 137b8e80941Smrg tgsi_target == TGSI_TEXTURE_2D) 138b8e80941Smrg return true; 139b8e80941Smrg break; 140b8e80941Smrg case PIPE_BUFFER: 141b8e80941Smrg return (tgsi_target == TGSI_TEXTURE_BUFFER); 142b8e80941Smrg } 143b8e80941Smrg return false; 144b8e80941Smrg} 145b8e80941Smrg 146b8e80941Smrgstatic bool 147b8e80941Smrgget_dimensions(const struct pipe_image_view *iview, 148b8e80941Smrg const struct softpipe_resource *spr, 149b8e80941Smrg unsigned tgsi_tex_instr, 150b8e80941Smrg enum pipe_format pformat, 151b8e80941Smrg unsigned *width, 152b8e80941Smrg unsigned *height, 153b8e80941Smrg unsigned *depth) 154b8e80941Smrg{ 155b8e80941Smrg if (tgsi_tex_instr == TGSI_TEXTURE_BUFFER) { 156b8e80941Smrg *width = iview->u.buf.size / util_format_get_blocksize(pformat); 157b8e80941Smrg *height = 1; 158b8e80941Smrg *depth = 1; 159b8e80941Smrg /* 160b8e80941Smrg * Bounds check the buffer size from the view 161b8e80941Smrg * and the buffer size from the underlying buffer. 162b8e80941Smrg */ 163b8e80941Smrg if (util_format_get_stride(pformat, *width) > 164b8e80941Smrg util_format_get_stride(spr->base.format, spr->base.width0)) 165b8e80941Smrg return false; 166b8e80941Smrg } else { 167b8e80941Smrg unsigned level; 168b8e80941Smrg 169b8e80941Smrg level = spr->base.target == PIPE_BUFFER ? 0 : iview->u.tex.level; 170b8e80941Smrg *width = u_minify(spr->base.width0, level); 171b8e80941Smrg *height = u_minify(spr->base.height0, level); 172b8e80941Smrg 173b8e80941Smrg if (spr->base.target == PIPE_TEXTURE_3D) 174b8e80941Smrg *depth = u_minify(spr->base.depth0, level); 175b8e80941Smrg else 176b8e80941Smrg *depth = spr->base.array_size; 177b8e80941Smrg 178b8e80941Smrg /* Make sure the resource and view have compatiable formats */ 179b8e80941Smrg if (util_format_get_blocksize(pformat) > 180b8e80941Smrg util_format_get_blocksize(spr->base.format)) 181b8e80941Smrg return false; 182b8e80941Smrg } 183b8e80941Smrg return true; 184b8e80941Smrg} 185b8e80941Smrg 186b8e80941Smrgstatic void 187b8e80941Smrgfill_coords(const struct tgsi_image_params *params, 188b8e80941Smrg unsigned index, 189b8e80941Smrg const int s[TGSI_QUAD_SIZE], 190b8e80941Smrg const int t[TGSI_QUAD_SIZE], 191b8e80941Smrg const int r[TGSI_QUAD_SIZE], 192b8e80941Smrg int *s_coord, int *t_coord, int *r_coord) 193b8e80941Smrg{ 194b8e80941Smrg *s_coord = s[index]; 195b8e80941Smrg *t_coord = has_1coord(params->tgsi_tex_instr) ? 0 : t[index]; 196b8e80941Smrg *r_coord = has_layer_or_depth(params->tgsi_tex_instr) ? 197b8e80941Smrg (params->tgsi_tex_instr == TGSI_TEXTURE_1D_ARRAY ? t[index] : r[index]) : 0; 198b8e80941Smrg} 199b8e80941Smrg/* 200b8e80941Smrg * Implement the image LOAD operation. 201b8e80941Smrg */ 202b8e80941Smrgstatic void 203b8e80941Smrgsp_tgsi_load(const struct tgsi_image *image, 204b8e80941Smrg const struct tgsi_image_params *params, 205b8e80941Smrg const int s[TGSI_QUAD_SIZE], 206b8e80941Smrg const int t[TGSI_QUAD_SIZE], 207b8e80941Smrg const int r[TGSI_QUAD_SIZE], 208b8e80941Smrg const int sample[TGSI_QUAD_SIZE], 209b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 210b8e80941Smrg{ 211b8e80941Smrg struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image; 212b8e80941Smrg struct pipe_image_view *iview; 213b8e80941Smrg struct softpipe_resource *spr; 214b8e80941Smrg unsigned width, height, depth; 215b8e80941Smrg unsigned stride; 216b8e80941Smrg int c, j; 217b8e80941Smrg char *data_ptr; 218b8e80941Smrg unsigned offset = 0; 219b8e80941Smrg 220b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_IMAGES) 221b8e80941Smrg goto fail_write_all_zero; 222b8e80941Smrg iview = &sp_img->sp_iview[params->unit]; 223b8e80941Smrg spr = (struct softpipe_resource *)iview->resource; 224b8e80941Smrg if (!spr) 225b8e80941Smrg goto fail_write_all_zero; 226b8e80941Smrg 227b8e80941Smrg if (!has_compat_target(spr->base.target, params->tgsi_tex_instr)) 228b8e80941Smrg goto fail_write_all_zero; 229b8e80941Smrg 230b8e80941Smrg if (!get_dimensions(iview, spr, params->tgsi_tex_instr, 231b8e80941Smrg params->format, &width, &height, &depth)) 232b8e80941Smrg return; 233b8e80941Smrg 234b8e80941Smrg stride = util_format_get_stride(params->format, width); 235b8e80941Smrg 236b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 237b8e80941Smrg int s_coord, t_coord, r_coord; 238b8e80941Smrg bool fill_zero = false; 239b8e80941Smrg 240b8e80941Smrg if (!(params->execmask & (1 << j))) 241b8e80941Smrg fill_zero = true; 242b8e80941Smrg 243b8e80941Smrg fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord); 244b8e80941Smrg if (!bounds_check(width, height, depth, 245b8e80941Smrg s_coord, t_coord, r_coord)) 246b8e80941Smrg fill_zero = true; 247b8e80941Smrg 248b8e80941Smrg if (fill_zero) { 249b8e80941Smrg int nc = util_format_get_nr_components(params->format); 250b8e80941Smrg int ival = util_format_is_pure_integer(params->format); 251b8e80941Smrg for (c = 0; c < 4; c++) { 252b8e80941Smrg rgba[c][j] = 0; 253b8e80941Smrg if (c == 3 && nc < 4) { 254b8e80941Smrg if (ival) 255b8e80941Smrg ((int32_t *)rgba[c])[j] = 1; 256b8e80941Smrg else 257b8e80941Smrg rgba[c][j] = 1.0; 258b8e80941Smrg } 259b8e80941Smrg } 260b8e80941Smrg continue; 261b8e80941Smrg } 262b8e80941Smrg offset = get_image_offset(spr, iview, params->format, r_coord); 263b8e80941Smrg data_ptr = (char *)spr->data + offset; 264b8e80941Smrg 265b8e80941Smrg if (util_format_is_pure_sint(params->format)) { 266b8e80941Smrg int32_t sdata[4]; 267b8e80941Smrg 268b8e80941Smrg util_format_read_4i(params->format, 269b8e80941Smrg sdata, 0, 270b8e80941Smrg data_ptr, stride, 271b8e80941Smrg s_coord, t_coord, 1, 1); 272b8e80941Smrg for (c = 0; c < 4; c++) 273b8e80941Smrg ((int32_t *)rgba[c])[j] = sdata[c]; 274b8e80941Smrg } else if (util_format_is_pure_uint(params->format)) { 275b8e80941Smrg uint32_t sdata[4]; 276b8e80941Smrg util_format_read_4ui(params->format, 277b8e80941Smrg sdata, 0, 278b8e80941Smrg data_ptr, stride, 279b8e80941Smrg s_coord, t_coord, 1, 1); 280b8e80941Smrg for (c = 0; c < 4; c++) 281b8e80941Smrg ((uint32_t *)rgba[c])[j] = sdata[c]; 282b8e80941Smrg } else { 283b8e80941Smrg float sdata[4]; 284b8e80941Smrg util_format_read_4f(params->format, 285b8e80941Smrg sdata, 0, 286b8e80941Smrg data_ptr, stride, 287b8e80941Smrg s_coord, t_coord, 1, 1); 288b8e80941Smrg for (c = 0; c < 4; c++) 289b8e80941Smrg rgba[c][j] = sdata[c]; 290b8e80941Smrg } 291b8e80941Smrg } 292b8e80941Smrg return; 293b8e80941Smrgfail_write_all_zero: 294b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 295b8e80941Smrg for (c = 0; c < 4; c++) 296b8e80941Smrg rgba[c][j] = 0; 297b8e80941Smrg } 298b8e80941Smrg return; 299b8e80941Smrg} 300b8e80941Smrg 301b8e80941Smrg/* 302b8e80941Smrg * Implement the image STORE operation. 303b8e80941Smrg */ 304b8e80941Smrgstatic void 305b8e80941Smrgsp_tgsi_store(const struct tgsi_image *image, 306b8e80941Smrg const struct tgsi_image_params *params, 307b8e80941Smrg const int s[TGSI_QUAD_SIZE], 308b8e80941Smrg const int t[TGSI_QUAD_SIZE], 309b8e80941Smrg const int r[TGSI_QUAD_SIZE], 310b8e80941Smrg const int sample[TGSI_QUAD_SIZE], 311b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 312b8e80941Smrg{ 313b8e80941Smrg struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image; 314b8e80941Smrg struct pipe_image_view *iview; 315b8e80941Smrg struct softpipe_resource *spr; 316b8e80941Smrg unsigned width, height, depth; 317b8e80941Smrg unsigned stride; 318b8e80941Smrg char *data_ptr; 319b8e80941Smrg int j, c; 320b8e80941Smrg unsigned offset = 0; 321b8e80941Smrg unsigned pformat = params->format; 322b8e80941Smrg 323b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_IMAGES) 324b8e80941Smrg return; 325b8e80941Smrg iview = &sp_img->sp_iview[params->unit]; 326b8e80941Smrg spr = (struct softpipe_resource *)iview->resource; 327b8e80941Smrg if (!spr) 328b8e80941Smrg return; 329b8e80941Smrg if (!has_compat_target(spr->base.target, params->tgsi_tex_instr)) 330b8e80941Smrg return; 331b8e80941Smrg 332b8e80941Smrg if (params->format == PIPE_FORMAT_NONE) 333b8e80941Smrg pformat = spr->base.format; 334b8e80941Smrg 335b8e80941Smrg if (!get_dimensions(iview, spr, params->tgsi_tex_instr, 336b8e80941Smrg pformat, &width, &height, &depth)) 337b8e80941Smrg return; 338b8e80941Smrg 339b8e80941Smrg stride = util_format_get_stride(pformat, width); 340b8e80941Smrg 341b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 342b8e80941Smrg int s_coord, t_coord, r_coord; 343b8e80941Smrg 344b8e80941Smrg if (!(params->execmask & (1 << j))) 345b8e80941Smrg continue; 346b8e80941Smrg 347b8e80941Smrg fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord); 348b8e80941Smrg if (!bounds_check(width, height, depth, 349b8e80941Smrg s_coord, t_coord, r_coord)) 350b8e80941Smrg continue; 351b8e80941Smrg 352b8e80941Smrg offset = get_image_offset(spr, iview, pformat, r_coord); 353b8e80941Smrg data_ptr = (char *)spr->data + offset; 354b8e80941Smrg 355b8e80941Smrg if (util_format_is_pure_sint(pformat)) { 356b8e80941Smrg int32_t sdata[4]; 357b8e80941Smrg for (c = 0; c < 4; c++) 358b8e80941Smrg sdata[c] = ((int32_t *)rgba[c])[j]; 359b8e80941Smrg util_format_write_4i(pformat, sdata, 0, data_ptr, stride, 360b8e80941Smrg s_coord, t_coord, 1, 1); 361b8e80941Smrg } else if (util_format_is_pure_uint(pformat)) { 362b8e80941Smrg uint32_t sdata[4]; 363b8e80941Smrg for (c = 0; c < 4; c++) 364b8e80941Smrg sdata[c] = ((uint32_t *)rgba[c])[j]; 365b8e80941Smrg util_format_write_4ui(pformat, sdata, 0, data_ptr, stride, 366b8e80941Smrg s_coord, t_coord, 1, 1); 367b8e80941Smrg } else { 368b8e80941Smrg float sdata[4]; 369b8e80941Smrg for (c = 0; c < 4; c++) 370b8e80941Smrg sdata[c] = rgba[c][j]; 371b8e80941Smrg util_format_write_4f(pformat, sdata, 0, data_ptr, stride, 372b8e80941Smrg s_coord, t_coord, 1, 1); 373b8e80941Smrg } 374b8e80941Smrg } 375b8e80941Smrg} 376b8e80941Smrg 377b8e80941Smrg/* 378b8e80941Smrg * Implement atomic operations on unsigned integers. 379b8e80941Smrg */ 380b8e80941Smrgstatic void 381b8e80941Smrghandle_op_uint(const struct pipe_image_view *iview, 382b8e80941Smrg const struct tgsi_image_params *params, 383b8e80941Smrg bool just_read, 384b8e80941Smrg char *data_ptr, 385b8e80941Smrg uint qi, 386b8e80941Smrg unsigned stride, 387b8e80941Smrg enum tgsi_opcode opcode, 388b8e80941Smrg int s, 389b8e80941Smrg int t, 390b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], 391b8e80941Smrg float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 392b8e80941Smrg{ 393b8e80941Smrg uint c; 394b8e80941Smrg int nc = util_format_get_nr_components(params->format); 395b8e80941Smrg unsigned sdata[4]; 396b8e80941Smrg 397b8e80941Smrg util_format_read_4ui(params->format, 398b8e80941Smrg sdata, 0, 399b8e80941Smrg data_ptr, stride, 400b8e80941Smrg s, t, 1, 1); 401b8e80941Smrg 402b8e80941Smrg if (just_read) { 403b8e80941Smrg for (c = 0; c < nc; c++) { 404b8e80941Smrg ((uint32_t *)rgba[c])[qi] = sdata[c]; 405b8e80941Smrg } 406b8e80941Smrg return; 407b8e80941Smrg } 408b8e80941Smrg switch (opcode) { 409b8e80941Smrg case TGSI_OPCODE_ATOMUADD: 410b8e80941Smrg for (c = 0; c < nc; c++) { 411b8e80941Smrg unsigned temp = sdata[c]; 412b8e80941Smrg sdata[c] += ((uint32_t *)rgba[c])[qi]; 413b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 414b8e80941Smrg } 415b8e80941Smrg break; 416b8e80941Smrg case TGSI_OPCODE_ATOMXCHG: 417b8e80941Smrg for (c = 0; c < nc; c++) { 418b8e80941Smrg unsigned temp = sdata[c]; 419b8e80941Smrg sdata[c] = ((uint32_t *)rgba[c])[qi]; 420b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 421b8e80941Smrg } 422b8e80941Smrg break; 423b8e80941Smrg case TGSI_OPCODE_ATOMCAS: 424b8e80941Smrg for (c = 0; c < nc; c++) { 425b8e80941Smrg unsigned dst_x = sdata[c]; 426b8e80941Smrg unsigned cmp_x = ((uint32_t *)rgba[c])[qi]; 427b8e80941Smrg unsigned src_x = ((uint32_t *)rgba2[c])[qi]; 428b8e80941Smrg unsigned temp = sdata[c]; 429b8e80941Smrg sdata[c] = (dst_x == cmp_x) ? src_x : dst_x; 430b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 431b8e80941Smrg } 432b8e80941Smrg break; 433b8e80941Smrg case TGSI_OPCODE_ATOMAND: 434b8e80941Smrg for (c = 0; c < nc; c++) { 435b8e80941Smrg unsigned temp = sdata[c]; 436b8e80941Smrg sdata[c] &= ((uint32_t *)rgba[c])[qi]; 437b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 438b8e80941Smrg } 439b8e80941Smrg break; 440b8e80941Smrg case TGSI_OPCODE_ATOMOR: 441b8e80941Smrg for (c = 0; c < nc; c++) { 442b8e80941Smrg unsigned temp = sdata[c]; 443b8e80941Smrg sdata[c] |= ((uint32_t *)rgba[c])[qi]; 444b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 445b8e80941Smrg } 446b8e80941Smrg break; 447b8e80941Smrg case TGSI_OPCODE_ATOMXOR: 448b8e80941Smrg for (c = 0; c < nc; c++) { 449b8e80941Smrg unsigned temp = sdata[c]; 450b8e80941Smrg sdata[c] ^= ((uint32_t *)rgba[c])[qi]; 451b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 452b8e80941Smrg } 453b8e80941Smrg break; 454b8e80941Smrg case TGSI_OPCODE_ATOMUMIN: 455b8e80941Smrg for (c = 0; c < nc; c++) { 456b8e80941Smrg unsigned dst_x = sdata[c]; 457b8e80941Smrg unsigned src_x = ((uint32_t *)rgba[c])[qi]; 458b8e80941Smrg sdata[c] = MIN2(dst_x, src_x); 459b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 460b8e80941Smrg } 461b8e80941Smrg break; 462b8e80941Smrg case TGSI_OPCODE_ATOMUMAX: 463b8e80941Smrg for (c = 0; c < nc; c++) { 464b8e80941Smrg unsigned dst_x = sdata[c]; 465b8e80941Smrg unsigned src_x = ((uint32_t *)rgba[c])[qi]; 466b8e80941Smrg sdata[c] = MAX2(dst_x, src_x); 467b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 468b8e80941Smrg } 469b8e80941Smrg break; 470b8e80941Smrg case TGSI_OPCODE_ATOMIMIN: 471b8e80941Smrg for (c = 0; c < nc; c++) { 472b8e80941Smrg int dst_x = sdata[c]; 473b8e80941Smrg int src_x = ((uint32_t *)rgba[c])[qi]; 474b8e80941Smrg sdata[c] = MIN2(dst_x, src_x); 475b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 476b8e80941Smrg } 477b8e80941Smrg break; 478b8e80941Smrg case TGSI_OPCODE_ATOMIMAX: 479b8e80941Smrg for (c = 0; c < nc; c++) { 480b8e80941Smrg int dst_x = sdata[c]; 481b8e80941Smrg int src_x = ((uint32_t *)rgba[c])[qi]; 482b8e80941Smrg sdata[c] = MAX2(dst_x, src_x); 483b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 484b8e80941Smrg } 485b8e80941Smrg break; 486b8e80941Smrg default: 487b8e80941Smrg assert(!"Unexpected TGSI opcode in sp_tgsi_op"); 488b8e80941Smrg break; 489b8e80941Smrg } 490b8e80941Smrg util_format_write_4ui(params->format, sdata, 0, data_ptr, stride, 491b8e80941Smrg s, t, 1, 1); 492b8e80941Smrg} 493b8e80941Smrg 494b8e80941Smrg/* 495b8e80941Smrg * Implement atomic operations on signed integers. 496b8e80941Smrg */ 497b8e80941Smrgstatic void 498b8e80941Smrghandle_op_int(const struct pipe_image_view *iview, 499b8e80941Smrg const struct tgsi_image_params *params, 500b8e80941Smrg bool just_read, 501b8e80941Smrg char *data_ptr, 502b8e80941Smrg uint qi, 503b8e80941Smrg unsigned stride, 504b8e80941Smrg enum tgsi_opcode opcode, 505b8e80941Smrg int s, 506b8e80941Smrg int t, 507b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], 508b8e80941Smrg float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 509b8e80941Smrg{ 510b8e80941Smrg uint c; 511b8e80941Smrg int nc = util_format_get_nr_components(params->format); 512b8e80941Smrg int sdata[4]; 513b8e80941Smrg util_format_read_4i(params->format, 514b8e80941Smrg sdata, 0, 515b8e80941Smrg data_ptr, stride, 516b8e80941Smrg s, t, 1, 1); 517b8e80941Smrg 518b8e80941Smrg if (just_read) { 519b8e80941Smrg for (c = 0; c < nc; c++) { 520b8e80941Smrg ((int32_t *)rgba[c])[qi] = sdata[c]; 521b8e80941Smrg } 522b8e80941Smrg return; 523b8e80941Smrg } 524b8e80941Smrg switch (opcode) { 525b8e80941Smrg case TGSI_OPCODE_ATOMUADD: 526b8e80941Smrg for (c = 0; c < nc; c++) { 527b8e80941Smrg int temp = sdata[c]; 528b8e80941Smrg sdata[c] += ((int32_t *)rgba[c])[qi]; 529b8e80941Smrg ((int32_t *)rgba[c])[qi] = temp; 530b8e80941Smrg } 531b8e80941Smrg break; 532b8e80941Smrg case TGSI_OPCODE_ATOMXCHG: 533b8e80941Smrg for (c = 0; c < nc; c++) { 534b8e80941Smrg int temp = sdata[c]; 535b8e80941Smrg sdata[c] = ((int32_t *)rgba[c])[qi]; 536b8e80941Smrg ((int32_t *)rgba[c])[qi] = temp; 537b8e80941Smrg } 538b8e80941Smrg break; 539b8e80941Smrg case TGSI_OPCODE_ATOMCAS: 540b8e80941Smrg for (c = 0; c < nc; c++) { 541b8e80941Smrg int dst_x = sdata[c]; 542b8e80941Smrg int cmp_x = ((int32_t *)rgba[c])[qi]; 543b8e80941Smrg int src_x = ((int32_t *)rgba2[c])[qi]; 544b8e80941Smrg int temp = sdata[c]; 545b8e80941Smrg sdata[c] = (dst_x == cmp_x) ? src_x : dst_x; 546b8e80941Smrg ((int32_t *)rgba[c])[qi] = temp; 547b8e80941Smrg } 548b8e80941Smrg break; 549b8e80941Smrg case TGSI_OPCODE_ATOMAND: 550b8e80941Smrg for (c = 0; c < nc; c++) { 551b8e80941Smrg int temp = sdata[c]; 552b8e80941Smrg sdata[c] &= ((int32_t *)rgba[c])[qi]; 553b8e80941Smrg ((int32_t *)rgba[c])[qi] = temp; 554b8e80941Smrg } 555b8e80941Smrg break; 556b8e80941Smrg case TGSI_OPCODE_ATOMOR: 557b8e80941Smrg for (c = 0; c < nc; c++) { 558b8e80941Smrg int temp = sdata[c]; 559b8e80941Smrg sdata[c] |= ((int32_t *)rgba[c])[qi]; 560b8e80941Smrg ((int32_t *)rgba[c])[qi] = temp; 561b8e80941Smrg } 562b8e80941Smrg break; 563b8e80941Smrg case TGSI_OPCODE_ATOMXOR: 564b8e80941Smrg for (c = 0; c < nc; c++) { 565b8e80941Smrg int temp = sdata[c]; 566b8e80941Smrg sdata[c] ^= ((int32_t *)rgba[c])[qi]; 567b8e80941Smrg ((int32_t *)rgba[c])[qi] = temp; 568b8e80941Smrg } 569b8e80941Smrg break; 570b8e80941Smrg case TGSI_OPCODE_ATOMUMIN: 571b8e80941Smrg for (c = 0; c < nc; c++) { 572b8e80941Smrg int dst_x = sdata[c]; 573b8e80941Smrg int src_x = ((int32_t *)rgba[c])[qi]; 574b8e80941Smrg sdata[c] = MIN2(dst_x, src_x); 575b8e80941Smrg ((int32_t *)rgba[c])[qi] = dst_x; 576b8e80941Smrg } 577b8e80941Smrg break; 578b8e80941Smrg case TGSI_OPCODE_ATOMUMAX: 579b8e80941Smrg for (c = 0; c < nc; c++) { 580b8e80941Smrg int dst_x = sdata[c]; 581b8e80941Smrg int src_x = ((int32_t *)rgba[c])[qi]; 582b8e80941Smrg sdata[c] = MAX2(dst_x, src_x); 583b8e80941Smrg ((int32_t *)rgba[c])[qi] = dst_x; 584b8e80941Smrg } 585b8e80941Smrg break; 586b8e80941Smrg case TGSI_OPCODE_ATOMIMIN: 587b8e80941Smrg for (c = 0; c < nc; c++) { 588b8e80941Smrg int dst_x = sdata[c]; 589b8e80941Smrg int src_x = ((int32_t *)rgba[c])[qi]; 590b8e80941Smrg sdata[c] = MIN2(dst_x, src_x); 591b8e80941Smrg ((int32_t *)rgba[c])[qi] = dst_x; 592b8e80941Smrg } 593b8e80941Smrg break; 594b8e80941Smrg case TGSI_OPCODE_ATOMIMAX: 595b8e80941Smrg for (c = 0; c < nc; c++) { 596b8e80941Smrg int dst_x = sdata[c]; 597b8e80941Smrg int src_x = ((int32_t *)rgba[c])[qi]; 598b8e80941Smrg sdata[c] = MAX2(dst_x, src_x); 599b8e80941Smrg ((int32_t *)rgba[c])[qi] = dst_x; 600b8e80941Smrg } 601b8e80941Smrg break; 602b8e80941Smrg default: 603b8e80941Smrg assert(!"Unexpected TGSI opcode in sp_tgsi_op"); 604b8e80941Smrg break; 605b8e80941Smrg } 606b8e80941Smrg util_format_write_4i(params->format, sdata, 0, data_ptr, stride, 607b8e80941Smrg s, t, 1, 1); 608b8e80941Smrg} 609b8e80941Smrg 610b8e80941Smrg/* GLES OES_shader_image_atomic.txt allows XCHG on R32F */ 611b8e80941Smrgstatic void 612b8e80941Smrghandle_op_r32f_xchg(const struct pipe_image_view *iview, 613b8e80941Smrg const struct tgsi_image_params *params, 614b8e80941Smrg bool just_read, 615b8e80941Smrg char *data_ptr, 616b8e80941Smrg uint qi, 617b8e80941Smrg unsigned stride, 618b8e80941Smrg enum tgsi_opcode opcode, 619b8e80941Smrg int s, 620b8e80941Smrg int t, 621b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 622b8e80941Smrg{ 623b8e80941Smrg float sdata[4]; 624b8e80941Smrg uint c; 625b8e80941Smrg int nc = 1; 626b8e80941Smrg util_format_read_4f(params->format, 627b8e80941Smrg sdata, 0, 628b8e80941Smrg data_ptr, stride, 629b8e80941Smrg s, t, 1, 1); 630b8e80941Smrg if (just_read) { 631b8e80941Smrg for (c = 0; c < nc; c++) { 632b8e80941Smrg ((int32_t *)rgba[c])[qi] = sdata[c]; 633b8e80941Smrg } 634b8e80941Smrg return; 635b8e80941Smrg } 636b8e80941Smrg 637b8e80941Smrg for (c = 0; c < nc; c++) { 638b8e80941Smrg int temp = sdata[c]; 639b8e80941Smrg sdata[c] = ((float *)rgba[c])[qi]; 640b8e80941Smrg ((float *)rgba[c])[qi] = temp; 641b8e80941Smrg } 642b8e80941Smrg util_format_write_4f(params->format, sdata, 0, data_ptr, stride, 643b8e80941Smrg s, t, 1, 1); 644b8e80941Smrg} 645b8e80941Smrg 646b8e80941Smrg/* 647b8e80941Smrg * Implement atomic image operations. 648b8e80941Smrg */ 649b8e80941Smrgstatic void 650b8e80941Smrgsp_tgsi_op(const struct tgsi_image *image, 651b8e80941Smrg const struct tgsi_image_params *params, 652b8e80941Smrg enum tgsi_opcode opcode, 653b8e80941Smrg const int s[TGSI_QUAD_SIZE], 654b8e80941Smrg const int t[TGSI_QUAD_SIZE], 655b8e80941Smrg const int r[TGSI_QUAD_SIZE], 656b8e80941Smrg const int sample[TGSI_QUAD_SIZE], 657b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], 658b8e80941Smrg float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 659b8e80941Smrg{ 660b8e80941Smrg struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image; 661b8e80941Smrg struct pipe_image_view *iview; 662b8e80941Smrg struct softpipe_resource *spr; 663b8e80941Smrg unsigned width, height, depth; 664b8e80941Smrg unsigned stride; 665b8e80941Smrg int j, c; 666b8e80941Smrg unsigned offset; 667b8e80941Smrg char *data_ptr; 668b8e80941Smrg 669b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_IMAGES) 670b8e80941Smrg return; 671b8e80941Smrg iview = &sp_img->sp_iview[params->unit]; 672b8e80941Smrg spr = (struct softpipe_resource *)iview->resource; 673b8e80941Smrg if (!spr) 674b8e80941Smrg goto fail_write_all_zero; 675b8e80941Smrg if (!has_compat_target(spr->base.target, params->tgsi_tex_instr)) 676b8e80941Smrg goto fail_write_all_zero; 677b8e80941Smrg 678b8e80941Smrg if (!get_dimensions(iview, spr, params->tgsi_tex_instr, 679b8e80941Smrg params->format, &width, &height, &depth)) 680b8e80941Smrg goto fail_write_all_zero; 681b8e80941Smrg 682b8e80941Smrg stride = util_format_get_stride(spr->base.format, width); 683b8e80941Smrg 684b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 685b8e80941Smrg int s_coord, t_coord, r_coord; 686b8e80941Smrg bool just_read = false; 687b8e80941Smrg 688b8e80941Smrg fill_coords(params, j, s, t, r, &s_coord, &t_coord, &r_coord); 689b8e80941Smrg if (!bounds_check(width, height, depth, 690b8e80941Smrg s_coord, t_coord, r_coord)) { 691b8e80941Smrg int nc = util_format_get_nr_components(params->format); 692b8e80941Smrg int ival = util_format_is_pure_integer(params->format); 693b8e80941Smrg int c; 694b8e80941Smrg for (c = 0; c < 4; c++) { 695b8e80941Smrg rgba[c][j] = 0; 696b8e80941Smrg if (c == 3 && nc < 4) { 697b8e80941Smrg if (ival) 698b8e80941Smrg ((int32_t *)rgba[c])[j] = 1; 699b8e80941Smrg else 700b8e80941Smrg rgba[c][j] = 1.0; 701b8e80941Smrg } 702b8e80941Smrg } 703b8e80941Smrg continue; 704b8e80941Smrg } 705b8e80941Smrg 706b8e80941Smrg /* just readback the value for atomic if execmask isn't set */ 707b8e80941Smrg if (!(params->execmask & (1 << j))) { 708b8e80941Smrg just_read = true; 709b8e80941Smrg } 710b8e80941Smrg 711b8e80941Smrg offset = get_image_offset(spr, iview, params->format, r_coord); 712b8e80941Smrg data_ptr = (char *)spr->data + offset; 713b8e80941Smrg 714b8e80941Smrg /* we should see atomic operations on r32 formats */ 715b8e80941Smrg if (util_format_is_pure_uint(params->format)) 716b8e80941Smrg handle_op_uint(iview, params, just_read, data_ptr, j, stride, 717b8e80941Smrg opcode, s_coord, t_coord, rgba, rgba2); 718b8e80941Smrg else if (util_format_is_pure_sint(params->format)) 719b8e80941Smrg handle_op_int(iview, params, just_read, data_ptr, j, stride, 720b8e80941Smrg opcode, s_coord, t_coord, rgba, rgba2); 721b8e80941Smrg else if (params->format == PIPE_FORMAT_R32_FLOAT && 722b8e80941Smrg opcode == TGSI_OPCODE_ATOMXCHG) 723b8e80941Smrg handle_op_r32f_xchg(iview, params, just_read, data_ptr, j, stride, 724b8e80941Smrg opcode, s_coord, t_coord, rgba); 725b8e80941Smrg else 726b8e80941Smrg assert(0); 727b8e80941Smrg } 728b8e80941Smrg return; 729b8e80941Smrgfail_write_all_zero: 730b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 731b8e80941Smrg for (c = 0; c < 4; c++) 732b8e80941Smrg rgba[c][j] = 0; 733b8e80941Smrg } 734b8e80941Smrg return; 735b8e80941Smrg} 736b8e80941Smrg 737b8e80941Smrgstatic void 738b8e80941Smrgsp_tgsi_get_dims(const struct tgsi_image *image, 739b8e80941Smrg const struct tgsi_image_params *params, 740b8e80941Smrg int dims[4]) 741b8e80941Smrg{ 742b8e80941Smrg struct sp_tgsi_image *sp_img = (struct sp_tgsi_image *)image; 743b8e80941Smrg struct pipe_image_view *iview; 744b8e80941Smrg struct softpipe_resource *spr; 745b8e80941Smrg int level; 746b8e80941Smrg 747b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_IMAGES) 748b8e80941Smrg return; 749b8e80941Smrg iview = &sp_img->sp_iview[params->unit]; 750b8e80941Smrg spr = (struct softpipe_resource *)iview->resource; 751b8e80941Smrg if (!spr) 752b8e80941Smrg return; 753b8e80941Smrg 754b8e80941Smrg if (params->tgsi_tex_instr == TGSI_TEXTURE_BUFFER) { 755b8e80941Smrg dims[0] = iview->u.buf.size / util_format_get_blocksize(iview->format); 756b8e80941Smrg dims[1] = dims[2] = dims[3] = 0; 757b8e80941Smrg return; 758b8e80941Smrg } 759b8e80941Smrg 760b8e80941Smrg level = iview->u.tex.level; 761b8e80941Smrg dims[0] = u_minify(spr->base.width0, level); 762b8e80941Smrg switch (params->tgsi_tex_instr) { 763b8e80941Smrg case TGSI_TEXTURE_1D_ARRAY: 764b8e80941Smrg dims[1] = iview->u.tex.last_layer - iview->u.tex.first_layer + 1; 765b8e80941Smrg /* fallthrough */ 766b8e80941Smrg case TGSI_TEXTURE_1D: 767b8e80941Smrg return; 768b8e80941Smrg case TGSI_TEXTURE_2D_ARRAY: 769b8e80941Smrg dims[2] = iview->u.tex.last_layer - iview->u.tex.first_layer + 1; 770b8e80941Smrg /* fallthrough */ 771b8e80941Smrg case TGSI_TEXTURE_2D: 772b8e80941Smrg case TGSI_TEXTURE_CUBE: 773b8e80941Smrg case TGSI_TEXTURE_RECT: 774b8e80941Smrg dims[1] = u_minify(spr->base.height0, level); 775b8e80941Smrg return; 776b8e80941Smrg case TGSI_TEXTURE_3D: 777b8e80941Smrg dims[1] = u_minify(spr->base.height0, level); 778b8e80941Smrg dims[2] = u_minify(spr->base.depth0, level); 779b8e80941Smrg return; 780b8e80941Smrg case TGSI_TEXTURE_CUBE_ARRAY: 781b8e80941Smrg dims[1] = u_minify(spr->base.height0, level); 782b8e80941Smrg dims[2] = (iview->u.tex.last_layer - iview->u.tex.first_layer + 1) / 6; 783b8e80941Smrg break; 784b8e80941Smrg default: 785b8e80941Smrg assert(!"unexpected texture target in sp_get_dims()"); 786b8e80941Smrg return; 787b8e80941Smrg } 788b8e80941Smrg} 789b8e80941Smrg 790b8e80941Smrgstruct sp_tgsi_image * 791b8e80941Smrgsp_create_tgsi_image(void) 792b8e80941Smrg{ 793b8e80941Smrg struct sp_tgsi_image *img = CALLOC_STRUCT(sp_tgsi_image); 794b8e80941Smrg if (!img) 795b8e80941Smrg return NULL; 796b8e80941Smrg 797b8e80941Smrg img->base.load = sp_tgsi_load; 798b8e80941Smrg img->base.store = sp_tgsi_store; 799b8e80941Smrg img->base.op = sp_tgsi_op; 800b8e80941Smrg img->base.get_dims = sp_tgsi_get_dims; 801b8e80941Smrg return img; 802b8e80941Smrg}; 803