101e04c3fSmrg/* 201e04c3fSmrg * Copyright 2016 Red Hat. 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub 801e04c3fSmrg * license, and/or sell copies of the Software, and to permit persons to whom 901e04c3fSmrg * the Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 1901e04c3fSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 2001e04c3fSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 2101e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg#include "util/u_inlines.h" 2401e04c3fSmrg#include "util/u_math.h" 2501e04c3fSmrg#include "util/u_memory.h" 2601e04c3fSmrg#include "util/u_pstipple.h" 2701e04c3fSmrg#include "pipe/p_shader_tokens.h" 2801e04c3fSmrg#include "draw/draw_context.h" 2901e04c3fSmrg#include "draw/draw_vertex.h" 3001e04c3fSmrg#include "sp_context.h" 3101e04c3fSmrg#include "sp_screen.h" 3201e04c3fSmrg#include "sp_state.h" 3301e04c3fSmrg#include "sp_texture.h" 3401e04c3fSmrg#include "sp_tex_sample.h" 3501e04c3fSmrg#include "sp_tex_tile_cache.h" 3601e04c3fSmrg#include "tgsi/tgsi_parse.h" 3701e04c3fSmrg 3801e04c3fSmrgstatic void 3901e04c3fSmrgcs_prepare(const struct sp_compute_shader *cs, 4001e04c3fSmrg struct tgsi_exec_machine *machine, 4101e04c3fSmrg int w, int h, int d, 4201e04c3fSmrg int g_w, int g_h, int g_d, 4301e04c3fSmrg int b_w, int b_h, int b_d, 4401e04c3fSmrg struct tgsi_sampler *sampler, 4501e04c3fSmrg struct tgsi_image *image, 4601e04c3fSmrg struct tgsi_buffer *buffer ) 4701e04c3fSmrg{ 4801e04c3fSmrg int j; 4901e04c3fSmrg /* 5001e04c3fSmrg * Bind tokens/shader to the interpreter's machine state. 5101e04c3fSmrg */ 5201e04c3fSmrg tgsi_exec_machine_bind_shader(machine, 5301e04c3fSmrg cs->tokens, 5401e04c3fSmrg sampler, image, buffer); 5501e04c3fSmrg 5601e04c3fSmrg if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) { 5701e04c3fSmrg unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID]; 5801e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 5901e04c3fSmrg machine->SystemValue[i].xyzw[0].i[j] = w; 6001e04c3fSmrg machine->SystemValue[i].xyzw[1].i[j] = h; 6101e04c3fSmrg machine->SystemValue[i].xyzw[2].i[j] = d; 6201e04c3fSmrg } 6301e04c3fSmrg } 6401e04c3fSmrg 6501e04c3fSmrg if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) { 6601e04c3fSmrg unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE]; 6701e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 6801e04c3fSmrg machine->SystemValue[i].xyzw[0].i[j] = g_w; 6901e04c3fSmrg machine->SystemValue[i].xyzw[1].i[j] = g_h; 7001e04c3fSmrg machine->SystemValue[i].xyzw[2].i[j] = g_d; 7101e04c3fSmrg } 7201e04c3fSmrg } 7301e04c3fSmrg 7401e04c3fSmrg if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) { 7501e04c3fSmrg unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE]; 7601e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 7701e04c3fSmrg machine->SystemValue[i].xyzw[0].i[j] = b_w; 7801e04c3fSmrg machine->SystemValue[i].xyzw[1].i[j] = b_h; 7901e04c3fSmrg machine->SystemValue[i].xyzw[2].i[j] = b_d; 8001e04c3fSmrg } 8101e04c3fSmrg } 8201e04c3fSmrg} 8301e04c3fSmrg 8401e04c3fSmrgstatic bool 8501e04c3fSmrgcs_run(const struct sp_compute_shader *cs, 8601e04c3fSmrg int g_w, int g_h, int g_d, 8701e04c3fSmrg struct tgsi_exec_machine *machine, bool restart) 8801e04c3fSmrg{ 8901e04c3fSmrg if (!restart) { 9001e04c3fSmrg if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) { 9101e04c3fSmrg unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID]; 9201e04c3fSmrg int j; 9301e04c3fSmrg for (j = 0; j < TGSI_QUAD_SIZE; j++) { 9401e04c3fSmrg machine->SystemValue[i].xyzw[0].i[j] = g_w; 9501e04c3fSmrg machine->SystemValue[i].xyzw[1].i[j] = g_h; 9601e04c3fSmrg machine->SystemValue[i].xyzw[2].i[j] = g_d; 9701e04c3fSmrg } 9801e04c3fSmrg } 9901e04c3fSmrg machine->NonHelperMask = (1 << 1) - 1; 10001e04c3fSmrg } 10101e04c3fSmrg 10201e04c3fSmrg tgsi_exec_machine_run(machine, restart ? machine->pc : 0); 10301e04c3fSmrg 10401e04c3fSmrg if (machine->pc != -1) 10501e04c3fSmrg return true; 10601e04c3fSmrg return false; 10701e04c3fSmrg} 10801e04c3fSmrg 10901e04c3fSmrgstatic void 11001e04c3fSmrgrun_workgroup(const struct sp_compute_shader *cs, 11101e04c3fSmrg int g_w, int g_h, int g_d, int num_threads, 11201e04c3fSmrg struct tgsi_exec_machine **machines) 11301e04c3fSmrg{ 11401e04c3fSmrg int i; 11501e04c3fSmrg bool grp_hit_barrier, restart_threads = false; 11601e04c3fSmrg 11701e04c3fSmrg do { 11801e04c3fSmrg grp_hit_barrier = false; 11901e04c3fSmrg for (i = 0; i < num_threads; i++) { 12001e04c3fSmrg grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads); 12101e04c3fSmrg } 12201e04c3fSmrg restart_threads = false; 12301e04c3fSmrg if (grp_hit_barrier) { 12401e04c3fSmrg grp_hit_barrier = false; 12501e04c3fSmrg restart_threads = true; 12601e04c3fSmrg } 12701e04c3fSmrg } while (restart_threads); 12801e04c3fSmrg} 12901e04c3fSmrg 13001e04c3fSmrgstatic void 13101e04c3fSmrgcs_delete(const struct sp_compute_shader *cs, 13201e04c3fSmrg struct tgsi_exec_machine *machine) 13301e04c3fSmrg{ 13401e04c3fSmrg if (machine->Tokens == cs->tokens) { 13501e04c3fSmrg tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL); 13601e04c3fSmrg } 13701e04c3fSmrg} 13801e04c3fSmrg 13901e04c3fSmrgstatic void 14001e04c3fSmrgfill_grid_size(struct pipe_context *context, 14101e04c3fSmrg const struct pipe_grid_info *info, 14201e04c3fSmrg uint32_t grid_size[3]) 14301e04c3fSmrg{ 14401e04c3fSmrg struct pipe_transfer *transfer; 14501e04c3fSmrg uint32_t *params; 14601e04c3fSmrg if (!info->indirect) { 14701e04c3fSmrg grid_size[0] = info->grid[0]; 14801e04c3fSmrg grid_size[1] = info->grid[1]; 14901e04c3fSmrg grid_size[2] = info->grid[2]; 15001e04c3fSmrg return; 15101e04c3fSmrg } 15201e04c3fSmrg params = pipe_buffer_map_range(context, info->indirect, 15301e04c3fSmrg info->indirect_offset, 15401e04c3fSmrg 3 * sizeof(uint32_t), 1557ec681f3Smrg PIPE_MAP_READ, 15601e04c3fSmrg &transfer); 15701e04c3fSmrg 15801e04c3fSmrg if (!transfer) 15901e04c3fSmrg return; 16001e04c3fSmrg 16101e04c3fSmrg grid_size[0] = params[0]; 16201e04c3fSmrg grid_size[1] = params[1]; 16301e04c3fSmrg grid_size[2] = params[2]; 16401e04c3fSmrg pipe_buffer_unmap(context, transfer); 16501e04c3fSmrg} 16601e04c3fSmrg 16701e04c3fSmrgvoid 16801e04c3fSmrgsoftpipe_launch_grid(struct pipe_context *context, 16901e04c3fSmrg const struct pipe_grid_info *info) 17001e04c3fSmrg{ 17101e04c3fSmrg struct softpipe_context *softpipe = softpipe_context(context); 17201e04c3fSmrg struct sp_compute_shader *cs = softpipe->cs; 17301e04c3fSmrg int num_threads_in_group; 17401e04c3fSmrg struct tgsi_exec_machine **machines; 17501e04c3fSmrg int bwidth, bheight, bdepth; 17601e04c3fSmrg int w, h, d, i; 17701e04c3fSmrg int g_w, g_h, g_d; 17801e04c3fSmrg uint32_t grid_size[3] = {0}; 17901e04c3fSmrg void *local_mem = NULL; 18001e04c3fSmrg 18101e04c3fSmrg softpipe_update_compute_samplers(softpipe); 18201e04c3fSmrg bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH]; 18301e04c3fSmrg bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT]; 18401e04c3fSmrg bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; 18501e04c3fSmrg num_threads_in_group = bwidth * bheight * bdepth; 18601e04c3fSmrg 18701e04c3fSmrg fill_grid_size(context, info, grid_size); 18801e04c3fSmrg 18901e04c3fSmrg if (cs->shader.req_local_mem) { 19001e04c3fSmrg local_mem = CALLOC(1, cs->shader.req_local_mem); 19101e04c3fSmrg } 19201e04c3fSmrg 19301e04c3fSmrg machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group); 19401e04c3fSmrg if (!machines) { 19501e04c3fSmrg FREE(local_mem); 19601e04c3fSmrg return; 19701e04c3fSmrg } 19801e04c3fSmrg 19901e04c3fSmrg /* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */ 20001e04c3fSmrg for (d = 0; d < bdepth; d++) { 20101e04c3fSmrg for (h = 0; h < bheight; h++) { 20201e04c3fSmrg for (w = 0; w < bwidth; w++) { 20301e04c3fSmrg int idx = w + (h * bwidth) + (d * bheight * bwidth); 20401e04c3fSmrg machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE); 20501e04c3fSmrg 20601e04c3fSmrg machines[idx]->LocalMem = local_mem; 20701e04c3fSmrg machines[idx]->LocalMemSize = cs->shader.req_local_mem; 20801e04c3fSmrg cs_prepare(cs, machines[idx], 20901e04c3fSmrg w, h, d, 21001e04c3fSmrg grid_size[0], grid_size[1], grid_size[2], 21101e04c3fSmrg bwidth, bheight, bdepth, 21201e04c3fSmrg (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE], 21301e04c3fSmrg (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE], 21401e04c3fSmrg (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]); 21501e04c3fSmrg tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS, 21601e04c3fSmrg softpipe->mapped_constants[PIPE_SHADER_COMPUTE], 21701e04c3fSmrg softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]); 21801e04c3fSmrg } 21901e04c3fSmrg } 22001e04c3fSmrg } 22101e04c3fSmrg 22201e04c3fSmrg for (g_d = 0; g_d < grid_size[2]; g_d++) { 22301e04c3fSmrg for (g_h = 0; g_h < grid_size[1]; g_h++) { 22401e04c3fSmrg for (g_w = 0; g_w < grid_size[0]; g_w++) { 22501e04c3fSmrg run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines); 22601e04c3fSmrg } 22701e04c3fSmrg } 22801e04c3fSmrg } 22901e04c3fSmrg 2307ec681f3Smrg if (softpipe->active_statistics_queries) { 2317ec681f3Smrg softpipe->pipeline_statistics.cs_invocations += 2327ec681f3Smrg grid_size[0] * grid_size[1] * grid_size[2]; 2337ec681f3Smrg } 2347ec681f3Smrg 23501e04c3fSmrg for (i = 0; i < num_threads_in_group; i++) { 23601e04c3fSmrg cs_delete(cs, machines[i]); 23701e04c3fSmrg tgsi_exec_machine_destroy(machines[i]); 23801e04c3fSmrg } 23901e04c3fSmrg 24001e04c3fSmrg FREE(local_mem); 24101e04c3fSmrg FREE(machines); 24201e04c3fSmrg} 243