1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2016 Red Hat. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 8b8e80941Smrg * license, and/or sell copies of the Software, and to permit persons to whom 9b8e80941Smrg * the Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "sp_context.h" 25b8e80941Smrg#include "sp_buffer.h" 26b8e80941Smrg#include "sp_texture.h" 27b8e80941Smrg 28b8e80941Smrg#include "util/u_format.h" 29b8e80941Smrg 30b8e80941Smrgstatic bool 31b8e80941Smrgget_dimensions(const struct pipe_shader_buffer *bview, 32b8e80941Smrg const struct softpipe_resource *spr, 33b8e80941Smrg unsigned *width) 34b8e80941Smrg{ 35b8e80941Smrg *width = bview->buffer_size; 36b8e80941Smrg /* 37b8e80941Smrg * Bounds check the buffer size from the view 38b8e80941Smrg * and the buffer size from the underlying buffer. 39b8e80941Smrg */ 40b8e80941Smrg if (*width > spr->base.width0) 41b8e80941Smrg return false; 42b8e80941Smrg return true; 43b8e80941Smrg} 44b8e80941Smrg 45b8e80941Smrg/* 46b8e80941Smrg * Implement the image LOAD operation. 47b8e80941Smrg */ 48b8e80941Smrgstatic void 49b8e80941Smrgsp_tgsi_load(const struct tgsi_buffer *buffer, 50b8e80941Smrg const struct tgsi_buffer_params *params, 51b8e80941Smrg const int s[TGSI_QUAD_SIZE], 52b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 53b8e80941Smrg{ 54b8e80941Smrg struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer; 55b8e80941Smrg struct pipe_shader_buffer *bview; 56b8e80941Smrg struct softpipe_resource *spr; 57b8e80941Smrg unsigned width; 58b8e80941Smrg int c, j; 59b8e80941Smrg unsigned char *data_ptr; 60b8e80941Smrg const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT); 61b8e80941Smrg 62b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_BUFFERS) 63b8e80941Smrg goto fail_write_all_zero; 64b8e80941Smrg 65b8e80941Smrg bview = &sp_buf->sp_bview[params->unit]; 66b8e80941Smrg spr = softpipe_resource(bview->buffer); 67b8e80941Smrg if (!spr) 68b8e80941Smrg goto fail_write_all_zero; 69b8e80941Smrg 70b8e80941Smrg if (!get_dimensions(bview, spr, &width)) 71b8e80941Smrg return; 72b8e80941Smrg 73b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 74b8e80941Smrg int s_coord; 75b8e80941Smrg bool fill_zero = false; 76b8e80941Smrg uint32_t sdata[4]; 77b8e80941Smrg 78b8e80941Smrg if (!(params->execmask & (1 << j))) 79b8e80941Smrg fill_zero = true; 80b8e80941Smrg 81b8e80941Smrg s_coord = s[j]; 82b8e80941Smrg if (s_coord >= width) 83b8e80941Smrg fill_zero = true; 84b8e80941Smrg 85b8e80941Smrg if (fill_zero) { 86b8e80941Smrg for (c = 0; c < 4; c++) 87b8e80941Smrg rgba[c][j] = 0; 88b8e80941Smrg continue; 89b8e80941Smrg } 90b8e80941Smrg data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord; 91b8e80941Smrg for (c = 0; c < 4; c++) { 92b8e80941Smrg format_desc->fetch_rgba_uint(sdata, data_ptr, 0, 0); 93b8e80941Smrg ((uint32_t *)rgba[c])[j] = sdata[0]; 94b8e80941Smrg data_ptr += 4; 95b8e80941Smrg } 96b8e80941Smrg } 97b8e80941Smrg return; 98b8e80941Smrgfail_write_all_zero: 99b8e80941Smrg memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4); 100b8e80941Smrg return; 101b8e80941Smrg} 102b8e80941Smrg 103b8e80941Smrg/* 104b8e80941Smrg * Implement the buffer STORE operation. 105b8e80941Smrg */ 106b8e80941Smrgstatic void 107b8e80941Smrgsp_tgsi_store(const struct tgsi_buffer *buffer, 108b8e80941Smrg const struct tgsi_buffer_params *params, 109b8e80941Smrg const int s[TGSI_QUAD_SIZE], 110b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 111b8e80941Smrg{ 112b8e80941Smrg struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer; 113b8e80941Smrg struct pipe_shader_buffer *bview; 114b8e80941Smrg struct softpipe_resource *spr; 115b8e80941Smrg unsigned width; 116b8e80941Smrg unsigned char *data_ptr; 117b8e80941Smrg int j, c; 118b8e80941Smrg const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT); 119b8e80941Smrg 120b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_BUFFERS) 121b8e80941Smrg return; 122b8e80941Smrg 123b8e80941Smrg bview = &sp_buf->sp_bview[params->unit]; 124b8e80941Smrg spr = softpipe_resource(bview->buffer); 125b8e80941Smrg if (!spr) 126b8e80941Smrg return; 127b8e80941Smrg 128b8e80941Smrg if (!get_dimensions(bview, spr, &width)) 129b8e80941Smrg return; 130b8e80941Smrg 131b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 132b8e80941Smrg int s_coord; 133b8e80941Smrg 134b8e80941Smrg if (!(params->execmask & (1 << j))) 135b8e80941Smrg continue; 136b8e80941Smrg 137b8e80941Smrg s_coord = s[j]; 138b8e80941Smrg if (s_coord >= width) 139b8e80941Smrg continue; 140b8e80941Smrg 141b8e80941Smrg data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord; 142b8e80941Smrg 143b8e80941Smrg for (c = 0; c < 4; c++) { 144b8e80941Smrg if (params->writemask & (1 << c)) { 145b8e80941Smrg unsigned temp[4]; 146b8e80941Smrg unsigned char *dptr = data_ptr + (c * 4); 147b8e80941Smrg temp[0] = ((uint32_t *)rgba[c])[j]; 148b8e80941Smrg format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1); 149b8e80941Smrg } 150b8e80941Smrg } 151b8e80941Smrg } 152b8e80941Smrg} 153b8e80941Smrg 154b8e80941Smrg/* 155b8e80941Smrg * Implement atomic operations on unsigned integers. 156b8e80941Smrg */ 157b8e80941Smrgstatic void 158b8e80941Smrghandle_op_atomic(const struct pipe_shader_buffer *bview, 159b8e80941Smrg bool just_read, 160b8e80941Smrg unsigned char *data_ptr, 161b8e80941Smrg uint qi, 162b8e80941Smrg enum tgsi_opcode opcode, 163b8e80941Smrg unsigned writemask, 164b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], 165b8e80941Smrg float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 166b8e80941Smrg{ 167b8e80941Smrg uint c; 168b8e80941Smrg const struct util_format_description *format_desc = util_format_description(PIPE_FORMAT_R32_UINT); 169b8e80941Smrg unsigned sdata[4]; 170b8e80941Smrg 171b8e80941Smrg for (c = 0; c < 4; c++) { 172b8e80941Smrg unsigned temp[4]; 173b8e80941Smrg unsigned char *dptr = data_ptr + (c * 4); 174b8e80941Smrg format_desc->fetch_rgba_uint(temp, dptr, 0, 0); 175b8e80941Smrg sdata[c] = temp[0]; 176b8e80941Smrg } 177b8e80941Smrg 178b8e80941Smrg if (just_read) { 179b8e80941Smrg for (c = 0; c < 4; c++) { 180b8e80941Smrg ((uint32_t *)rgba[c])[qi] = sdata[c]; 181b8e80941Smrg } 182b8e80941Smrg return; 183b8e80941Smrg } 184b8e80941Smrg 185b8e80941Smrg switch (opcode) { 186b8e80941Smrg case TGSI_OPCODE_ATOMUADD: 187b8e80941Smrg for (c = 0; c < 4; c++) { 188b8e80941Smrg unsigned temp = sdata[c]; 189b8e80941Smrg sdata[c] += ((uint32_t *)rgba[c])[qi]; 190b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 191b8e80941Smrg } 192b8e80941Smrg break; 193b8e80941Smrg case TGSI_OPCODE_ATOMXCHG: 194b8e80941Smrg for (c = 0; c < 4; c++) { 195b8e80941Smrg unsigned temp = sdata[c]; 196b8e80941Smrg sdata[c] = ((uint32_t *)rgba[c])[qi]; 197b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 198b8e80941Smrg } 199b8e80941Smrg break; 200b8e80941Smrg case TGSI_OPCODE_ATOMCAS: 201b8e80941Smrg for (c = 0; c < 4; c++) { 202b8e80941Smrg unsigned dst_x = sdata[c]; 203b8e80941Smrg unsigned cmp_x = ((uint32_t *)rgba[c])[qi]; 204b8e80941Smrg unsigned src_x = ((uint32_t *)rgba2[c])[qi]; 205b8e80941Smrg unsigned temp = sdata[c]; 206b8e80941Smrg sdata[c] = (dst_x == cmp_x) ? src_x : dst_x; 207b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 208b8e80941Smrg } 209b8e80941Smrg break; 210b8e80941Smrg case TGSI_OPCODE_ATOMAND: 211b8e80941Smrg for (c = 0; c < 4; c++) { 212b8e80941Smrg unsigned temp = sdata[c]; 213b8e80941Smrg sdata[c] &= ((uint32_t *)rgba[c])[qi]; 214b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 215b8e80941Smrg } 216b8e80941Smrg break; 217b8e80941Smrg case TGSI_OPCODE_ATOMOR: 218b8e80941Smrg for (c = 0; c < 4; c++) { 219b8e80941Smrg unsigned temp = sdata[c]; 220b8e80941Smrg sdata[c] |= ((uint32_t *)rgba[c])[qi]; 221b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 222b8e80941Smrg } 223b8e80941Smrg break; 224b8e80941Smrg case TGSI_OPCODE_ATOMXOR: 225b8e80941Smrg for (c = 0; c < 4; c++) { 226b8e80941Smrg unsigned temp = sdata[c]; 227b8e80941Smrg sdata[c] ^= ((uint32_t *)rgba[c])[qi]; 228b8e80941Smrg ((uint32_t *)rgba[c])[qi] = temp; 229b8e80941Smrg } 230b8e80941Smrg break; 231b8e80941Smrg case TGSI_OPCODE_ATOMUMIN: 232b8e80941Smrg for (c = 0; c < 4; c++) { 233b8e80941Smrg unsigned dst_x = sdata[c]; 234b8e80941Smrg unsigned src_x = ((uint32_t *)rgba[c])[qi]; 235b8e80941Smrg sdata[c] = MIN2(dst_x, src_x); 236b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 237b8e80941Smrg } 238b8e80941Smrg break; 239b8e80941Smrg case TGSI_OPCODE_ATOMUMAX: 240b8e80941Smrg for (c = 0; c < 4; c++) { 241b8e80941Smrg unsigned dst_x = sdata[c]; 242b8e80941Smrg unsigned src_x = ((uint32_t *)rgba[c])[qi]; 243b8e80941Smrg sdata[c] = MAX2(dst_x, src_x); 244b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 245b8e80941Smrg } 246b8e80941Smrg break; 247b8e80941Smrg case TGSI_OPCODE_ATOMIMIN: 248b8e80941Smrg for (c = 0; c < 4; c++) { 249b8e80941Smrg int dst_x = sdata[c]; 250b8e80941Smrg int src_x = ((uint32_t *)rgba[c])[qi]; 251b8e80941Smrg sdata[c] = MIN2(dst_x, src_x); 252b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 253b8e80941Smrg } 254b8e80941Smrg break; 255b8e80941Smrg case TGSI_OPCODE_ATOMIMAX: 256b8e80941Smrg for (c = 0; c < 4; c++) { 257b8e80941Smrg int dst_x = sdata[c]; 258b8e80941Smrg int src_x = ((uint32_t *)rgba[c])[qi]; 259b8e80941Smrg sdata[c] = MAX2(dst_x, src_x); 260b8e80941Smrg ((uint32_t *)rgba[c])[qi] = dst_x; 261b8e80941Smrg } 262b8e80941Smrg break; 263b8e80941Smrg case TGSI_OPCODE_ATOMFADD: 264b8e80941Smrg for (c = 0; c < 4; c++) { 265b8e80941Smrg float temp = uif(sdata[c]); 266b8e80941Smrg sdata[c] = fui(temp + rgba[c][qi]); 267b8e80941Smrg rgba[c][qi] = temp; 268b8e80941Smrg } 269b8e80941Smrg break; 270b8e80941Smrg default: 271b8e80941Smrg assert(!"Unexpected TGSI opcode in sp_tgsi_op"); 272b8e80941Smrg break; 273b8e80941Smrg } 274b8e80941Smrg 275b8e80941Smrg for (c = 0; c < 4; c++) { 276b8e80941Smrg if (writemask & (1 << c)) { 277b8e80941Smrg unsigned temp[4]; 278b8e80941Smrg unsigned char *dptr = data_ptr + (c * 4); 279b8e80941Smrg temp[0] = sdata[c]; 280b8e80941Smrg format_desc->pack_rgba_uint(dptr, 0, temp, 0, 1, 1); 281b8e80941Smrg } 282b8e80941Smrg } 283b8e80941Smrg} 284b8e80941Smrg 285b8e80941Smrg/* 286b8e80941Smrg * Implement atomic buffer operations. 287b8e80941Smrg */ 288b8e80941Smrgstatic void 289b8e80941Smrgsp_tgsi_op(const struct tgsi_buffer *buffer, 290b8e80941Smrg const struct tgsi_buffer_params *params, 291b8e80941Smrg enum tgsi_opcode opcode, 292b8e80941Smrg const int s[TGSI_QUAD_SIZE], 293b8e80941Smrg float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], 294b8e80941Smrg float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) 295b8e80941Smrg{ 296b8e80941Smrg struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer; 297b8e80941Smrg struct pipe_shader_buffer *bview; 298b8e80941Smrg struct softpipe_resource *spr; 299b8e80941Smrg unsigned width; 300b8e80941Smrg int j, c; 301b8e80941Smrg unsigned char *data_ptr; 302b8e80941Smrg 303b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_BUFFERS) 304b8e80941Smrg return; 305b8e80941Smrg 306b8e80941Smrg bview = &sp_buf->sp_bview[params->unit]; 307b8e80941Smrg spr = softpipe_resource(bview->buffer); 308b8e80941Smrg if (!spr) 309b8e80941Smrg goto fail_write_all_zero; 310b8e80941Smrg 311b8e80941Smrg if (!get_dimensions(bview, spr, &width)) 312b8e80941Smrg goto fail_write_all_zero; 313b8e80941Smrg 314b8e80941Smrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 315b8e80941Smrg int s_coord; 316b8e80941Smrg bool just_read = false; 317b8e80941Smrg 318b8e80941Smrg s_coord = s[j]; 319b8e80941Smrg if (s_coord >= width) { 320b8e80941Smrg for (c = 0; c < 4; c++) { 321b8e80941Smrg rgba[c][j] = 0; 322b8e80941Smrg } 323b8e80941Smrg continue; 324b8e80941Smrg } 325b8e80941Smrg 326b8e80941Smrg /* just readback the value for atomic if execmask isn't set */ 327b8e80941Smrg if (!(params->execmask & (1 << j))) { 328b8e80941Smrg just_read = true; 329b8e80941Smrg } 330b8e80941Smrg 331b8e80941Smrg data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord; 332b8e80941Smrg /* we should see atomic operations on r32 formats */ 333b8e80941Smrg 334b8e80941Smrg handle_op_atomic(bview, just_read, data_ptr, j, 335b8e80941Smrg opcode, params->writemask, rgba, rgba2); 336b8e80941Smrg } 337b8e80941Smrg return; 338b8e80941Smrgfail_write_all_zero: 339b8e80941Smrg memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4); 340b8e80941Smrg return; 341b8e80941Smrg} 342b8e80941Smrg 343b8e80941Smrg/* 344b8e80941Smrg * return size of the attached buffer for RESQ opcode. 345b8e80941Smrg */ 346b8e80941Smrgstatic void 347b8e80941Smrgsp_tgsi_get_dims(const struct tgsi_buffer *buffer, 348b8e80941Smrg const struct tgsi_buffer_params *params, 349b8e80941Smrg int *dim) 350b8e80941Smrg{ 351b8e80941Smrg struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer; 352b8e80941Smrg struct pipe_shader_buffer *bview; 353b8e80941Smrg struct softpipe_resource *spr; 354b8e80941Smrg 355b8e80941Smrg if (params->unit >= PIPE_MAX_SHADER_BUFFERS) 356b8e80941Smrg return; 357b8e80941Smrg 358b8e80941Smrg bview = &sp_buf->sp_bview[params->unit]; 359b8e80941Smrg spr = softpipe_resource(bview->buffer); 360b8e80941Smrg if (!spr) 361b8e80941Smrg return; 362b8e80941Smrg 363b8e80941Smrg *dim = bview->buffer_size; 364b8e80941Smrg} 365b8e80941Smrg 366b8e80941Smrgstruct sp_tgsi_buffer * 367b8e80941Smrgsp_create_tgsi_buffer(void) 368b8e80941Smrg{ 369b8e80941Smrg struct sp_tgsi_buffer *buf = CALLOC_STRUCT(sp_tgsi_buffer); 370b8e80941Smrg if (!buf) 371b8e80941Smrg return NULL; 372b8e80941Smrg 373b8e80941Smrg buf->base.load = sp_tgsi_load; 374b8e80941Smrg buf->base.store = sp_tgsi_store; 375b8e80941Smrg buf->base.op = sp_tgsi_op; 376b8e80941Smrg buf->base.get_dims = sp_tgsi_get_dims; 377b8e80941Smrg return buf; 378b8e80941Smrg}; 379