101e04c3fSmrg/*
201e04c3fSmrg * Copyright 2016 Red Hat.
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * on the rights to use, copy, modify, merge, publish, distribute, sub
801e04c3fSmrg * license, and/or sell copies of the Software, and to permit persons to whom
901e04c3fSmrg * the Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
1901e04c3fSmrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
2001e04c3fSmrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2101e04c3fSmrg * USE OR OTHER DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg#include "util/u_inlines.h"
2401e04c3fSmrg#include "util/u_math.h"
2501e04c3fSmrg#include "util/u_memory.h"
2601e04c3fSmrg#include "util/u_pstipple.h"
2701e04c3fSmrg#include "pipe/p_shader_tokens.h"
2801e04c3fSmrg#include "draw/draw_context.h"
2901e04c3fSmrg#include "draw/draw_vertex.h"
3001e04c3fSmrg#include "sp_context.h"
3101e04c3fSmrg#include "sp_screen.h"
3201e04c3fSmrg#include "sp_state.h"
3301e04c3fSmrg#include "sp_texture.h"
3401e04c3fSmrg#include "sp_tex_sample.h"
3501e04c3fSmrg#include "sp_tex_tile_cache.h"
3601e04c3fSmrg#include "tgsi/tgsi_parse.h"
3701e04c3fSmrg
3801e04c3fSmrgstatic void
3901e04c3fSmrgcs_prepare(const struct sp_compute_shader *cs,
4001e04c3fSmrg           struct tgsi_exec_machine *machine,
4101e04c3fSmrg           int w, int h, int d,
4201e04c3fSmrg           int g_w, int g_h, int g_d,
4301e04c3fSmrg           int b_w, int b_h, int b_d,
4401e04c3fSmrg           struct tgsi_sampler *sampler,
4501e04c3fSmrg           struct tgsi_image *image,
4601e04c3fSmrg           struct tgsi_buffer *buffer )
4701e04c3fSmrg{
4801e04c3fSmrg   int j;
4901e04c3fSmrg   /*
5001e04c3fSmrg    * Bind tokens/shader to the interpreter's machine state.
5101e04c3fSmrg    */
5201e04c3fSmrg   tgsi_exec_machine_bind_shader(machine,
5301e04c3fSmrg                                 cs->tokens,
5401e04c3fSmrg                                 sampler, image, buffer);
5501e04c3fSmrg
5601e04c3fSmrg   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
5701e04c3fSmrg      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
5801e04c3fSmrg      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
5901e04c3fSmrg         machine->SystemValue[i].xyzw[0].i[j] = w;
6001e04c3fSmrg         machine->SystemValue[i].xyzw[1].i[j] = h;
6101e04c3fSmrg         machine->SystemValue[i].xyzw[2].i[j] = d;
6201e04c3fSmrg      }
6301e04c3fSmrg   }
6401e04c3fSmrg
6501e04c3fSmrg   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
6601e04c3fSmrg      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
6701e04c3fSmrg      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
6801e04c3fSmrg         machine->SystemValue[i].xyzw[0].i[j] = g_w;
6901e04c3fSmrg         machine->SystemValue[i].xyzw[1].i[j] = g_h;
7001e04c3fSmrg         machine->SystemValue[i].xyzw[2].i[j] = g_d;
7101e04c3fSmrg      }
7201e04c3fSmrg   }
7301e04c3fSmrg
7401e04c3fSmrg   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
7501e04c3fSmrg      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
7601e04c3fSmrg      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
7701e04c3fSmrg         machine->SystemValue[i].xyzw[0].i[j] = b_w;
7801e04c3fSmrg         machine->SystemValue[i].xyzw[1].i[j] = b_h;
7901e04c3fSmrg         machine->SystemValue[i].xyzw[2].i[j] = b_d;
8001e04c3fSmrg      }
8101e04c3fSmrg   }
8201e04c3fSmrg}
8301e04c3fSmrg
8401e04c3fSmrgstatic bool
8501e04c3fSmrgcs_run(const struct sp_compute_shader *cs,
8601e04c3fSmrg       int g_w, int g_h, int g_d,
8701e04c3fSmrg       struct tgsi_exec_machine *machine, bool restart)
8801e04c3fSmrg{
8901e04c3fSmrg   if (!restart) {
9001e04c3fSmrg      if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
9101e04c3fSmrg         unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
9201e04c3fSmrg         int j;
9301e04c3fSmrg         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
9401e04c3fSmrg            machine->SystemValue[i].xyzw[0].i[j] = g_w;
9501e04c3fSmrg            machine->SystemValue[i].xyzw[1].i[j] = g_h;
9601e04c3fSmrg            machine->SystemValue[i].xyzw[2].i[j] = g_d;
9701e04c3fSmrg         }
9801e04c3fSmrg      }
9901e04c3fSmrg      machine->NonHelperMask = (1 << 1) - 1;
10001e04c3fSmrg   }
10101e04c3fSmrg
10201e04c3fSmrg   tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
10301e04c3fSmrg
10401e04c3fSmrg   if (machine->pc != -1)
10501e04c3fSmrg      return true;
10601e04c3fSmrg   return false;
10701e04c3fSmrg}
10801e04c3fSmrg
10901e04c3fSmrgstatic void
11001e04c3fSmrgrun_workgroup(const struct sp_compute_shader *cs,
11101e04c3fSmrg              int g_w, int g_h, int g_d, int num_threads,
11201e04c3fSmrg              struct tgsi_exec_machine **machines)
11301e04c3fSmrg{
11401e04c3fSmrg   int i;
11501e04c3fSmrg   bool grp_hit_barrier, restart_threads = false;
11601e04c3fSmrg
11701e04c3fSmrg   do {
11801e04c3fSmrg      grp_hit_barrier = false;
11901e04c3fSmrg      for (i = 0; i < num_threads; i++) {
12001e04c3fSmrg         grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
12101e04c3fSmrg      }
12201e04c3fSmrg      restart_threads = false;
12301e04c3fSmrg      if (grp_hit_barrier) {
12401e04c3fSmrg         grp_hit_barrier = false;
12501e04c3fSmrg         restart_threads = true;
12601e04c3fSmrg      }
12701e04c3fSmrg   } while (restart_threads);
12801e04c3fSmrg}
12901e04c3fSmrg
13001e04c3fSmrgstatic void
13101e04c3fSmrgcs_delete(const struct sp_compute_shader *cs,
13201e04c3fSmrg          struct tgsi_exec_machine *machine)
13301e04c3fSmrg{
13401e04c3fSmrg   if (machine->Tokens == cs->tokens) {
13501e04c3fSmrg      tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
13601e04c3fSmrg   }
13701e04c3fSmrg}
13801e04c3fSmrg
13901e04c3fSmrgstatic void
14001e04c3fSmrgfill_grid_size(struct pipe_context *context,
14101e04c3fSmrg               const struct pipe_grid_info *info,
14201e04c3fSmrg               uint32_t grid_size[3])
14301e04c3fSmrg{
14401e04c3fSmrg   struct pipe_transfer *transfer;
14501e04c3fSmrg   uint32_t *params;
14601e04c3fSmrg   if (!info->indirect) {
14701e04c3fSmrg      grid_size[0] = info->grid[0];
14801e04c3fSmrg      grid_size[1] = info->grid[1];
14901e04c3fSmrg      grid_size[2] = info->grid[2];
15001e04c3fSmrg      return;
15101e04c3fSmrg   }
15201e04c3fSmrg   params = pipe_buffer_map_range(context, info->indirect,
15301e04c3fSmrg                                  info->indirect_offset,
15401e04c3fSmrg                                  3 * sizeof(uint32_t),
1557ec681f3Smrg                                  PIPE_MAP_READ,
15601e04c3fSmrg                                  &transfer);
15701e04c3fSmrg
15801e04c3fSmrg   if (!transfer)
15901e04c3fSmrg      return;
16001e04c3fSmrg
16101e04c3fSmrg   grid_size[0] = params[0];
16201e04c3fSmrg   grid_size[1] = params[1];
16301e04c3fSmrg   grid_size[2] = params[2];
16401e04c3fSmrg   pipe_buffer_unmap(context, transfer);
16501e04c3fSmrg}
16601e04c3fSmrg
16701e04c3fSmrgvoid
16801e04c3fSmrgsoftpipe_launch_grid(struct pipe_context *context,
16901e04c3fSmrg                     const struct pipe_grid_info *info)
17001e04c3fSmrg{
17101e04c3fSmrg   struct softpipe_context *softpipe = softpipe_context(context);
17201e04c3fSmrg   struct sp_compute_shader *cs = softpipe->cs;
17301e04c3fSmrg   int num_threads_in_group;
17401e04c3fSmrg   struct tgsi_exec_machine **machines;
17501e04c3fSmrg   int bwidth, bheight, bdepth;
17601e04c3fSmrg   int w, h, d, i;
17701e04c3fSmrg   int g_w, g_h, g_d;
17801e04c3fSmrg   uint32_t grid_size[3] = {0};
17901e04c3fSmrg   void *local_mem = NULL;
18001e04c3fSmrg
18101e04c3fSmrg   softpipe_update_compute_samplers(softpipe);
18201e04c3fSmrg   bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
18301e04c3fSmrg   bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
18401e04c3fSmrg   bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
18501e04c3fSmrg   num_threads_in_group = bwidth * bheight * bdepth;
18601e04c3fSmrg
18701e04c3fSmrg   fill_grid_size(context, info, grid_size);
18801e04c3fSmrg
18901e04c3fSmrg   if (cs->shader.req_local_mem) {
19001e04c3fSmrg      local_mem = CALLOC(1, cs->shader.req_local_mem);
19101e04c3fSmrg   }
19201e04c3fSmrg
19301e04c3fSmrg   machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
19401e04c3fSmrg   if (!machines) {
19501e04c3fSmrg      FREE(local_mem);
19601e04c3fSmrg      return;
19701e04c3fSmrg   }
19801e04c3fSmrg
19901e04c3fSmrg   /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
20001e04c3fSmrg   for (d = 0; d < bdepth; d++) {
20101e04c3fSmrg      for (h = 0; h < bheight; h++) {
20201e04c3fSmrg         for (w = 0; w < bwidth; w++) {
20301e04c3fSmrg            int idx = w + (h * bwidth) + (d * bheight * bwidth);
20401e04c3fSmrg            machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
20501e04c3fSmrg
20601e04c3fSmrg            machines[idx]->LocalMem = local_mem;
20701e04c3fSmrg            machines[idx]->LocalMemSize = cs->shader.req_local_mem;
20801e04c3fSmrg            cs_prepare(cs, machines[idx],
20901e04c3fSmrg                       w, h, d,
21001e04c3fSmrg                       grid_size[0], grid_size[1], grid_size[2],
21101e04c3fSmrg                       bwidth, bheight, bdepth,
21201e04c3fSmrg                       (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
21301e04c3fSmrg                       (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
21401e04c3fSmrg                       (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
21501e04c3fSmrg            tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
21601e04c3fSmrg                                           softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
21701e04c3fSmrg                                           softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
21801e04c3fSmrg         }
21901e04c3fSmrg      }
22001e04c3fSmrg   }
22101e04c3fSmrg
22201e04c3fSmrg   for (g_d = 0; g_d < grid_size[2]; g_d++) {
22301e04c3fSmrg      for (g_h = 0; g_h < grid_size[1]; g_h++) {
22401e04c3fSmrg         for (g_w = 0; g_w < grid_size[0]; g_w++) {
22501e04c3fSmrg            run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
22601e04c3fSmrg         }
22701e04c3fSmrg      }
22801e04c3fSmrg   }
22901e04c3fSmrg
2307ec681f3Smrg   if (softpipe->active_statistics_queries) {
2317ec681f3Smrg      softpipe->pipeline_statistics.cs_invocations +=
2327ec681f3Smrg          grid_size[0] * grid_size[1] * grid_size[2];
2337ec681f3Smrg   }
2347ec681f3Smrg
23501e04c3fSmrg   for (i = 0; i < num_threads_in_group; i++) {
23601e04c3fSmrg      cs_delete(cs, machines[i]);
23701e04c3fSmrg      tgsi_exec_machine_destroy(machines[i]);
23801e04c3fSmrg   }
23901e04c3fSmrg
24001e04c3fSmrg   FREE(local_mem);
24101e04c3fSmrg   FREE(machines);
24201e04c3fSmrg}
243