1/************************************************************************** 2 * 3 * Copyright 2019 Red Hat. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **************************************************************************/ 25#include "util/u_memory.h" 26#include "util/simple_list.h" 27#include "util/os_time.h" 28#include "util/u_dump.h" 29#include "util/u_string.h" 30#include "tgsi/tgsi_dump.h" 31#include "tgsi/tgsi_parse.h" 32#include "gallivm/lp_bld_const.h" 33#include "gallivm/lp_bld_debug.h" 34#include "gallivm/lp_bld_intr.h" 35#include "gallivm/lp_bld_flow.h" 36#include "gallivm/lp_bld_gather.h" 37#include "gallivm/lp_bld_coro.h" 38#include "gallivm/lp_bld_nir.h" 39#include "lp_state_cs.h" 40#include "lp_context.h" 41#include "lp_debug.h" 42#include "lp_state.h" 43#include "lp_perf.h" 44#include "lp_screen.h" 45#include "lp_memory.h" 46#include "lp_query.h" 47#include "lp_cs_tpool.h" 48#include "frontend/sw_winsys.h" 49#include "nir/nir_to_tgsi_info.h" 50#include "util/mesa-sha1.h" 51#include "nir_serialize.h" 52 53/** Fragment shader number (for debugging) */ 54static unsigned cs_no = 0; 55 56struct lp_cs_job_info { 57 unsigned grid_size[3]; 58 unsigned grid_base[3]; 59 unsigned block_size[3]; 60 unsigned req_local_mem; 61 unsigned work_dim; 62 struct lp_cs_exec *current; 63}; 64 65static void 66generate_compute(struct llvmpipe_context *lp, 67 struct lp_compute_shader *shader, 68 struct lp_compute_shader_variant *variant) 69{ 70 struct gallivm_state *gallivm = variant->gallivm; 71 const struct lp_compute_shader_variant_key *key = &variant->key; 72 char func_name[64], func_name_coro[64]; 73 LLVMTypeRef arg_types[19]; 74 LLVMTypeRef func_type, coro_func_type; 75 LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); 76 LLVMValueRef context_ptr; 77 LLVMValueRef x_size_arg, y_size_arg, z_size_arg; 78 LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg; 79 LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg; 80 LLVMValueRef work_dim_arg, thread_data_ptr; 81 LLVMBasicBlockRef block; 82 LLVMBuilderRef builder; 83 struct lp_build_sampler_soa *sampler; 84 struct lp_build_image_soa *image; 85 LLVMValueRef function, coro; 86 struct lp_type cs_type; 87 unsigned i; 88 89 /* 90 * This function has two parts 91 * a) setup the coroutine execution environment loop. 92 * b) build the compute shader llvm for use inside the coroutine. 93 */ 94 assert(lp_native_vector_width / 32 >= 4); 95 96 memset(&cs_type, 0, sizeof cs_type); 97 cs_type.floating = TRUE; /* floating point values */ 98 cs_type.sign = TRUE; /* values are signed */ 99 cs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ 100 cs_type.width = 32; /* 32-bit float */ 101 cs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */ 102 snprintf(func_name, sizeof(func_name), "cs_variant"); 103 104 snprintf(func_name_coro, sizeof(func_name), "cs_co_variant"); 105 106 arg_types[0] = variant->jit_cs_context_ptr_type; /* context */ 107 arg_types[1] = int32_type; /* block_x_size */ 108 arg_types[2] = int32_type; /* block_y_size */ 109 arg_types[3] = int32_type; /* block_z_size */ 110 arg_types[4] = int32_type; /* grid_x */ 111 arg_types[5] = int32_type; /* grid_y */ 112 arg_types[6] = int32_type; /* grid_z */ 113 arg_types[7] = int32_type; /* grid_size_x */ 114 arg_types[8] = int32_type; /* grid_size_y */ 115 arg_types[9] = int32_type; /* grid_size_z */ 116 arg_types[10] = int32_type; /* work dim */ 117 arg_types[11] = variant->jit_cs_thread_data_ptr_type; /* per thread data */ 118 arg_types[12] = int32_type; /* coro only - num X loops */ 119 arg_types[13] = int32_type; /* coro only - partials */ 120 arg_types[14] = int32_type; /* coro block_x_size */ 121 arg_types[15] = int32_type; /* coro block_y_size */ 122 arg_types[16] = int32_type; /* coro block_z_size */ 123 arg_types[17] = int32_type; /* coro idx */ 124 arg_types[18] = LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), 0); 125 func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), 126 arg_types, ARRAY_SIZE(arg_types) - 7, 0); 127 128 coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), 129 arg_types, ARRAY_SIZE(arg_types), 0); 130 131 function = LLVMAddFunction(gallivm->module, func_name, func_type); 132 LLVMSetFunctionCallConv(function, LLVMCCallConv); 133 134 coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type); 135 LLVMSetFunctionCallConv(coro, LLVMCCallConv); 136 137 variant->function = function; 138 139 for(i = 0; i < ARRAY_SIZE(arg_types); ++i) { 140 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { 141 lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS); 142 if (i < ARRAY_SIZE(arg_types) - 7) 143 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); 144 } 145 } 146 147 lp_build_coro_declare_malloc_hooks(gallivm); 148 149 if (variant->gallivm->cache->data_size) 150 return; 151 152 context_ptr = LLVMGetParam(function, 0); 153 x_size_arg = LLVMGetParam(function, 1); 154 y_size_arg = LLVMGetParam(function, 2); 155 z_size_arg = LLVMGetParam(function, 3); 156 grid_x_arg = LLVMGetParam(function, 4); 157 grid_y_arg = LLVMGetParam(function, 5); 158 grid_z_arg = LLVMGetParam(function, 6); 159 grid_size_x_arg = LLVMGetParam(function, 7); 160 grid_size_y_arg = LLVMGetParam(function, 8); 161 grid_size_z_arg = LLVMGetParam(function, 9); 162 work_dim_arg = LLVMGetParam(function, 10); 163 thread_data_ptr = LLVMGetParam(function, 11); 164 165 lp_build_name(context_ptr, "context"); 166 lp_build_name(x_size_arg, "x_size"); 167 lp_build_name(y_size_arg, "y_size"); 168 lp_build_name(z_size_arg, "z_size"); 169 lp_build_name(grid_x_arg, "grid_x"); 170 lp_build_name(grid_y_arg, "grid_y"); 171 lp_build_name(grid_z_arg, "grid_z"); 172 lp_build_name(grid_size_x_arg, "grid_size_x"); 173 lp_build_name(grid_size_y_arg, "grid_size_y"); 174 lp_build_name(grid_size_z_arg, "grid_size_z"); 175 lp_build_name(work_dim_arg, "work_dim"); 176 lp_build_name(thread_data_ptr, "thread_data"); 177 178 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); 179 builder = gallivm->builder; 180 assert(builder); 181 LLVMPositionBuilderAtEnd(builder, block); 182 sampler = lp_llvm_sampler_soa_create(lp_cs_variant_key_samplers(key), key->nr_samplers); 183 image = lp_llvm_image_soa_create(lp_cs_variant_key_images(key), key->nr_images); 184 185 struct lp_build_loop_state loop_state[4]; 186 LLVMValueRef num_x_loop; 187 LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length); 188 num_x_loop = LLVMBuildAdd(gallivm->builder, x_size_arg, vec_length, ""); 189 num_x_loop = LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""); 190 num_x_loop = LLVMBuildUDiv(gallivm->builder, num_x_loop, vec_length, ""); 191 LLVMValueRef partials = LLVMBuildURem(gallivm->builder, x_size_arg, vec_length, ""); 192 193 LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, y_size_arg, ""); 194 coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, z_size_arg, ""); 195 196 /* build a ptr in memory to store all the frames in later. */ 197 LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 198 LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem"); 199 LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem); 200 201 LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, coro_num_hdls, "coro_hdls"); 202 203 unsigned end_coroutine = INT_MAX; 204 205 /* 206 * This is the main coroutine execution loop. It iterates over the dimensions 207 * and calls the coroutine main entrypoint on the first pass, but in subsequent 208 * passes it checks if the coroutine has completed and resumes it if not. 209 */ 210 /* take x_width - round up to type.length width */ 211 lp_build_loop_begin(&loop_state[3], gallivm, 212 lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */ 213 lp_build_loop_begin(&loop_state[2], gallivm, 214 lp_build_const_int32(gallivm, 0)); /* z loop */ 215 lp_build_loop_begin(&loop_state[1], gallivm, 216 lp_build_const_int32(gallivm, 0)); /* y loop */ 217 lp_build_loop_begin(&loop_state[0], gallivm, 218 lp_build_const_int32(gallivm, 0)); /* x loop */ 219 { 220 LLVMValueRef args[19]; 221 args[0] = context_ptr; 222 args[1] = loop_state[0].counter; 223 args[2] = loop_state[1].counter; 224 args[3] = loop_state[2].counter; 225 args[4] = grid_x_arg; 226 args[5] = grid_y_arg; 227 args[6] = grid_z_arg; 228 args[7] = grid_size_x_arg; 229 args[8] = grid_size_y_arg; 230 args[9] = grid_size_z_arg; 231 args[10] = work_dim_arg; 232 args[11] = thread_data_ptr; 233 args[12] = num_x_loop; 234 args[13] = partials; 235 args[14] = x_size_arg; 236 args[15] = y_size_arg; 237 args[16] = z_size_arg; 238 239 /* idx = (z * (size_x * size_y) + y * size_x + x */ 240 LLVMValueRef coro_hdl_idx = LLVMBuildMul(gallivm->builder, loop_state[2].counter, 241 LLVMBuildMul(gallivm->builder, num_x_loop, y_size_arg, ""), ""); 242 coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx, 243 LLVMBuildMul(gallivm->builder, loop_state[1].counter, 244 num_x_loop, ""), ""); 245 coro_hdl_idx = LLVMBuildAdd(gallivm->builder, coro_hdl_idx, 246 loop_state[0].counter, ""); 247 248 args[17] = coro_hdl_idx; 249 250 args[18] = coro_mem; 251 LLVMValueRef coro_entry = LLVMBuildGEP(gallivm->builder, coro_hdls, &coro_hdl_idx, 1, ""); 252 253 LLVMValueRef coro_hdl = LLVMBuildLoad(gallivm->builder, coro_entry, "coro_hdl"); 254 255 struct lp_build_if_state ifstate; 256 LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[3].counter, 257 lp_build_const_int32(gallivm, 0), ""); 258 /* first time here - call the coroutine function entry point */ 259 lp_build_if(&ifstate, gallivm, cmp); 260 LLVMValueRef coro_ret = LLVMBuildCall(gallivm->builder, coro, args, 19, ""); 261 LLVMBuildStore(gallivm->builder, coro_ret, coro_entry); 262 lp_build_else(&ifstate); 263 /* subsequent calls for this invocation - check if done. */ 264 LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl); 265 struct lp_build_if_state ifstate2; 266 lp_build_if(&ifstate2, gallivm, coro_done); 267 /* if done destroy and force loop exit */ 268 lp_build_coro_destroy(gallivm, coro_hdl); 269 lp_build_loop_force_set_counter(&loop_state[3], lp_build_const_int32(gallivm, end_coroutine - 1)); 270 lp_build_else(&ifstate2); 271 /* otherwise resume the coroutine */ 272 lp_build_coro_resume(gallivm, coro_hdl); 273 lp_build_endif(&ifstate2); 274 lp_build_endif(&ifstate); 275 lp_build_loop_force_reload_counter(&loop_state[3]); 276 } 277 lp_build_loop_end_cond(&loop_state[0], 278 num_x_loop, 279 NULL, LLVMIntUGE); 280 lp_build_loop_end_cond(&loop_state[1], 281 y_size_arg, 282 NULL, LLVMIntUGE); 283 lp_build_loop_end_cond(&loop_state[2], 284 z_size_arg, 285 NULL, LLVMIntUGE); 286 lp_build_loop_end_cond(&loop_state[3], 287 lp_build_const_int32(gallivm, end_coroutine), 288 NULL, LLVMIntEQ); 289 290 LLVMValueRef coro_mem_ptr = LLVMBuildLoad(builder, coro_mem, ""); 291 LLVMBuildCall(gallivm->builder, gallivm->coro_free_hook, &coro_mem_ptr, 1, ""); 292 293 LLVMBuildRetVoid(builder); 294 295 /* This is stage (b) - generate the compute shader code inside the coroutine. */ 296 LLVMValueRef block_x_size_arg, block_y_size_arg, block_z_size_arg; 297 context_ptr = LLVMGetParam(coro, 0); 298 x_size_arg = LLVMGetParam(coro, 1); 299 y_size_arg = LLVMGetParam(coro, 2); 300 z_size_arg = LLVMGetParam(coro, 3); 301 grid_x_arg = LLVMGetParam(coro, 4); 302 grid_y_arg = LLVMGetParam(coro, 5); 303 grid_z_arg = LLVMGetParam(coro, 6); 304 grid_size_x_arg = LLVMGetParam(coro, 7); 305 grid_size_y_arg = LLVMGetParam(coro, 8); 306 grid_size_z_arg = LLVMGetParam(coro, 9); 307 work_dim_arg = LLVMGetParam(coro, 10); 308 thread_data_ptr = LLVMGetParam(coro, 11); 309 num_x_loop = LLVMGetParam(coro, 12); 310 partials = LLVMGetParam(coro, 13); 311 block_x_size_arg = LLVMGetParam(coro, 14); 312 block_y_size_arg = LLVMGetParam(coro, 15); 313 block_z_size_arg = LLVMGetParam(coro, 16); 314 LLVMValueRef coro_idx = LLVMGetParam(coro, 17); 315 coro_mem = LLVMGetParam(coro, 18); 316 block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry"); 317 LLVMPositionBuilderAtEnd(builder, block); 318 { 319 LLVMValueRef consts_ptr, num_consts_ptr; 320 LLVMValueRef ssbo_ptr, num_ssbo_ptr; 321 LLVMValueRef shared_ptr; 322 LLVMValueRef kernel_args_ptr; 323 struct lp_build_mask_context mask; 324 struct lp_bld_tgsi_system_values system_values; 325 326 memset(&system_values, 0, sizeof(system_values)); 327 consts_ptr = lp_jit_cs_context_constants(gallivm, context_ptr); 328 num_consts_ptr = lp_jit_cs_context_num_constants(gallivm, context_ptr); 329 ssbo_ptr = lp_jit_cs_context_ssbos(gallivm, context_ptr); 330 num_ssbo_ptr = lp_jit_cs_context_num_ssbos(gallivm, context_ptr); 331 kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm, context_ptr); 332 333 shared_ptr = lp_jit_cs_thread_data_shared(gallivm, thread_data_ptr); 334 335 LLVMValueRef coro_num_hdls = LLVMBuildMul(gallivm->builder, num_x_loop, block_y_size_arg, ""); 336 coro_num_hdls = LLVMBuildMul(gallivm->builder, coro_num_hdls, block_z_size_arg, ""); 337 338 /* these are coroutine entrypoint necessities */ 339 LLVMValueRef coro_id = lp_build_coro_id(gallivm); 340 LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, coro_idx, coro_num_hdls); 341 342 LLVMValueRef alloced_ptr = LLVMBuildLoad(gallivm->builder, coro_mem, ""); 343 alloced_ptr = LLVMBuildGEP(gallivm->builder, alloced_ptr, &coro_entry, 1, ""); 344 LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr); 345 LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), ""); 346 LLVMValueRef tid_vals[3]; 347 LLVMValueRef tids_x[LP_MAX_VECTOR_LENGTH], tids_y[LP_MAX_VECTOR_LENGTH], tids_z[LP_MAX_VECTOR_LENGTH]; 348 LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, x_size_arg, vec_length, ""); 349 for (i = 0; i < cs_type.length; i++) { 350 tids_x[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), ""); 351 tids_y[i] = y_size_arg; 352 tids_z[i] = z_size_arg; 353 } 354 tid_vals[0] = lp_build_gather_values(gallivm, tids_x, cs_type.length); 355 tid_vals[1] = lp_build_gather_values(gallivm, tids_y, cs_type.length); 356 tid_vals[2] = lp_build_gather_values(gallivm, tids_z, cs_type.length); 357 system_values.thread_id = LLVMGetUndef(LLVMArrayType(LLVMVectorType(int32_type, cs_type.length), 3)); 358 for (i = 0; i < 3; i++) 359 system_values.thread_id = LLVMBuildInsertValue(builder, system_values.thread_id, tid_vals[i], i, ""); 360 361 LLVMValueRef gtids[3] = { grid_x_arg, grid_y_arg, grid_z_arg }; 362 system_values.block_id = LLVMGetUndef(LLVMVectorType(int32_type, 3)); 363 for (i = 0; i < 3; i++) 364 system_values.block_id = LLVMBuildInsertElement(builder, system_values.block_id, gtids[i], lp_build_const_int32(gallivm, i), ""); 365 366 LLVMValueRef gstids[3] = { grid_size_x_arg, grid_size_y_arg, grid_size_z_arg }; 367 system_values.grid_size = LLVMGetUndef(LLVMVectorType(int32_type, 3)); 368 for (i = 0; i < 3; i++) 369 system_values.grid_size = LLVMBuildInsertElement(builder, system_values.grid_size, gstids[i], lp_build_const_int32(gallivm, i), ""); 370 371 system_values.work_dim = work_dim_arg; 372 373 system_values.subgroup_id = coro_idx; 374 system_values.num_subgroups = LLVMBuildMul(builder, num_x_loop, 375 LLVMBuildMul(builder, block_y_size_arg, block_z_size_arg, ""), ""); 376 377 LLVMValueRef bsize[3] = { block_x_size_arg, block_y_size_arg, block_z_size_arg }; 378 system_values.block_size = LLVMGetUndef(LLVMVectorType(int32_type, 3)); 379 for (i = 0; i < 3; i++) 380 system_values.block_size = LLVMBuildInsertElement(builder, system_values.block_size, bsize[i], lp_build_const_int32(gallivm, i), ""); 381 382 LLVMValueRef last_x_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, x_size_arg, LLVMBuildSub(gallivm->builder, num_x_loop, lp_build_const_int32(gallivm, 1), ""), ""); 383 LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_x_loop, has_partials, ""); 384 struct lp_build_if_state if_state; 385 LLVMValueRef mask_val = lp_build_alloca(gallivm, LLVMVectorType(int32_type, cs_type.length), "mask"); 386 LLVMValueRef full_mask_val = lp_build_const_int_vec(gallivm, cs_type, ~0); 387 LLVMBuildStore(gallivm->builder, full_mask_val, mask_val); 388 389 lp_build_if(&if_state, gallivm, use_partial_mask); 390 struct lp_build_loop_state mask_loop_state; 391 lp_build_loop_begin(&mask_loop_state, gallivm, partials); 392 LLVMValueRef tmask_val = LLVMBuildLoad(gallivm->builder, mask_val, ""); 393 tmask_val = LLVMBuildInsertElement(gallivm->builder, tmask_val, lp_build_const_int32(gallivm, 0), mask_loop_state.counter, ""); 394 LLVMBuildStore(gallivm->builder, tmask_val, mask_val); 395 lp_build_loop_end_cond(&mask_loop_state, vec_length, NULL, LLVMIntUGE); 396 lp_build_endif(&if_state); 397 398 mask_val = LLVMBuildLoad(gallivm->builder, mask_val, ""); 399 lp_build_mask_begin(&mask, gallivm, cs_type, mask_val); 400 401 struct lp_build_coro_suspend_info coro_info; 402 403 LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend"); 404 LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup"); 405 406 coro_info.suspend = sus_block; 407 coro_info.cleanup = clean_block; 408 409 struct lp_build_tgsi_params params; 410 memset(¶ms, 0, sizeof(params)); 411 412 params.type = cs_type; 413 params.mask = &mask; 414 params.consts_ptr = consts_ptr; 415 params.const_sizes_ptr = num_consts_ptr; 416 params.system_values = &system_values; 417 params.context_ptr = context_ptr; 418 params.sampler = sampler; 419 params.info = &shader->info.base; 420 params.ssbo_ptr = ssbo_ptr; 421 params.ssbo_sizes_ptr = num_ssbo_ptr; 422 params.image = image; 423 params.shared_ptr = shared_ptr; 424 params.coro = &coro_info; 425 params.kernel_args = kernel_args_ptr; 426 params.aniso_filter_table = lp_jit_cs_context_aniso_filter_table(gallivm, context_ptr); 427 428 if (shader->base.type == PIPE_SHADER_IR_TGSI) 429 lp_build_tgsi_soa(gallivm, shader->base.tokens, ¶ms, NULL); 430 else 431 lp_build_nir_soa(gallivm, shader->base.ir.nir, ¶ms, 432 NULL); 433 434 mask_val = lp_build_mask_end(&mask); 435 436 lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true); 437 LLVMPositionBuilderAtEnd(builder, clean_block); 438 439 LLVMBuildBr(builder, sus_block); 440 LLVMPositionBuilderAtEnd(builder, sus_block); 441 442 lp_build_coro_end(gallivm, coro_hdl); 443 LLVMBuildRet(builder, coro_hdl); 444 } 445 446 sampler->destroy(sampler); 447 image->destroy(image); 448 449 gallivm_verify_function(gallivm, coro); 450 gallivm_verify_function(gallivm, function); 451} 452 453static void * 454llvmpipe_create_compute_state(struct pipe_context *pipe, 455 const struct pipe_compute_state *templ) 456{ 457 struct lp_compute_shader *shader; 458 int nr_samplers, nr_sampler_views; 459 460 shader = CALLOC_STRUCT(lp_compute_shader); 461 if (!shader) 462 return NULL; 463 464 shader->no = cs_no++; 465 466 shader->base.type = templ->ir_type; 467 shader->req_local_mem = templ->req_local_mem; 468 if (templ->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) { 469 struct blob_reader reader; 470 const struct pipe_binary_program_header *hdr = templ->prog; 471 472 blob_reader_init(&reader, hdr->blob, hdr->num_bytes); 473 shader->base.ir.nir = nir_deserialize(NULL, pipe->screen->get_compiler_options(pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE), &reader); 474 shader->base.type = PIPE_SHADER_IR_NIR; 475 476 pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir); 477 shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size; 478 } else if (templ->ir_type == PIPE_SHADER_IR_NIR) { 479 shader->base.ir.nir = (struct nir_shader *)templ->prog; 480 shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size; 481 } 482 if (shader->base.type == PIPE_SHADER_IR_TGSI) { 483 /* get/save the summary info for this shader */ 484 lp_build_tgsi_info(templ->prog, &shader->info); 485 486 /* we need to keep a local copy of the tokens */ 487 shader->base.tokens = tgsi_dup_tokens(templ->prog); 488 } else { 489 nir_tgsi_scan_shader(shader->base.ir.nir, &shader->info.base, false); 490 } 491 492 make_empty_list(&shader->variants); 493 494 nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; 495 nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 496 int nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1; 497 shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images); 498 499 return shader; 500} 501 502static void 503llvmpipe_bind_compute_state(struct pipe_context *pipe, 504 void *cs) 505{ 506 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); 507 508 if (llvmpipe->cs == cs) 509 return; 510 511 llvmpipe->cs = (struct lp_compute_shader *)cs; 512 llvmpipe->cs_dirty |= LP_CSNEW_CS; 513} 514 515/** 516 * Remove shader variant from two lists: the shader's variant list 517 * and the context's variant list. 518 */ 519static void 520llvmpipe_remove_cs_shader_variant(struct llvmpipe_context *lp, 521 struct lp_compute_shader_variant *variant) 522{ 523 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { 524 debug_printf("llvmpipe: del cs #%u var %u v created %u v cached %u " 525 "v total cached %u inst %u total inst %u\n", 526 variant->shader->no, variant->no, 527 variant->shader->variants_created, 528 variant->shader->variants_cached, 529 lp->nr_cs_variants, variant->nr_instrs, lp->nr_cs_instrs); 530 } 531 532 gallivm_destroy(variant->gallivm); 533 534 /* remove from shader's list */ 535 remove_from_list(&variant->list_item_local); 536 variant->shader->variants_cached--; 537 538 /* remove from context's list */ 539 remove_from_list(&variant->list_item_global); 540 lp->nr_cs_variants--; 541 lp->nr_cs_instrs -= variant->nr_instrs; 542 543 FREE(variant); 544} 545 546static void 547llvmpipe_delete_compute_state(struct pipe_context *pipe, 548 void *cs) 549{ 550 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); 551 struct lp_compute_shader *shader = cs; 552 struct lp_cs_variant_list_item *li; 553 554 if (llvmpipe->cs == cs) 555 llvmpipe->cs = NULL; 556 for (unsigned i = 0; i < shader->max_global_buffers; i++) 557 pipe_resource_reference(&shader->global_buffers[i], NULL); 558 FREE(shader->global_buffers); 559 560 /* Delete all the variants */ 561 li = first_elem(&shader->variants); 562 while(!at_end(&shader->variants, li)) { 563 struct lp_cs_variant_list_item *next = next_elem(li); 564 llvmpipe_remove_cs_shader_variant(llvmpipe, li->base); 565 li = next; 566 } 567 if (shader->base.ir.nir) 568 ralloc_free(shader->base.ir.nir); 569 tgsi_free_tokens(shader->base.tokens); 570 FREE(shader); 571} 572 573static struct lp_compute_shader_variant_key * 574make_variant_key(struct llvmpipe_context *lp, 575 struct lp_compute_shader *shader, 576 char *store) 577{ 578 int i; 579 struct lp_compute_shader_variant_key *key; 580 key = (struct lp_compute_shader_variant_key *)store; 581 memset(key, 0, sizeof(*key)); 582 583 /* This value will be the same for all the variants of a given shader: 584 */ 585 key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; 586 587 struct lp_sampler_static_state *cs_sampler; 588 589 cs_sampler = lp_cs_variant_key_samplers(key); 590 591 memset(cs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *cs_sampler); 592 for(i = 0; i < key->nr_samplers; ++i) { 593 if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 594 lp_sampler_static_sampler_state(&cs_sampler[i].sampler_state, 595 lp->samplers[PIPE_SHADER_COMPUTE][i]); 596 } 597 } 598 599 /* 600 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes 601 * are dx10-style? Can't really have mixed opcodes, at least not 602 * if we want to skip the holes here (without rescanning tgsi). 603 */ 604 if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 605 key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 606 for(i = 0; i < key->nr_sampler_views; ++i) { 607 /* 608 * Note sview may exceed what's representable by file_mask. 609 * This will still work, the only downside is that not actually 610 * used views may be included in the shader key. 611 */ 612 if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) { 613 lp_sampler_static_texture_state(&cs_sampler[i].texture_state, 614 lp->sampler_views[PIPE_SHADER_COMPUTE][i]); 615 } 616 } 617 } 618 else { 619 key->nr_sampler_views = key->nr_samplers; 620 for(i = 0; i < key->nr_sampler_views; ++i) { 621 if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 622 lp_sampler_static_texture_state(&cs_sampler[i].texture_state, 623 lp->sampler_views[PIPE_SHADER_COMPUTE][i]); 624 } 625 } 626 } 627 628 struct lp_image_static_state *lp_image; 629 lp_image = lp_cs_variant_key_images(key); 630 key->nr_images = shader->info.base.file_max[TGSI_FILE_IMAGE] + 1; 631 for (i = 0; i < key->nr_images; ++i) { 632 if (shader->info.base.file_mask[TGSI_FILE_IMAGE] & (1 << i)) { 633 lp_sampler_static_texture_state_image(&lp_image[i].image_state, 634 &lp->images[PIPE_SHADER_COMPUTE][i]); 635 } 636 } 637 return key; 638} 639 640static void 641dump_cs_variant_key(const struct lp_compute_shader_variant_key *key) 642{ 643 int i; 644 debug_printf("cs variant %p:\n", (void *) key); 645 646 for (i = 0; i < key->nr_samplers; ++i) { 647 const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key); 648 const struct lp_static_sampler_state *sampler = &samplers[i].sampler_state; 649 debug_printf("sampler[%u] = \n", i); 650 debug_printf(" .wrap = %s %s %s\n", 651 util_str_tex_wrap(sampler->wrap_s, TRUE), 652 util_str_tex_wrap(sampler->wrap_t, TRUE), 653 util_str_tex_wrap(sampler->wrap_r, TRUE)); 654 debug_printf(" .min_img_filter = %s\n", 655 util_str_tex_filter(sampler->min_img_filter, TRUE)); 656 debug_printf(" .min_mip_filter = %s\n", 657 util_str_tex_mipfilter(sampler->min_mip_filter, TRUE)); 658 debug_printf(" .mag_img_filter = %s\n", 659 util_str_tex_filter(sampler->mag_img_filter, TRUE)); 660 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) 661 debug_printf(" .compare_func = %s\n", util_str_func(sampler->compare_func, TRUE)); 662 debug_printf(" .normalized_coords = %u\n", sampler->normalized_coords); 663 debug_printf(" .min_max_lod_equal = %u\n", sampler->min_max_lod_equal); 664 debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero); 665 debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod); 666 debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod); 667 debug_printf(" .aniso = %u\n", sampler->aniso); 668 } 669 for (i = 0; i < key->nr_sampler_views; ++i) { 670 const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key); 671 const struct lp_static_texture_state *texture = &samplers[i].texture_state; 672 debug_printf("texture[%u] = \n", i); 673 debug_printf(" .format = %s\n", 674 util_format_name(texture->format)); 675 debug_printf(" .target = %s\n", 676 util_str_tex_target(texture->target, TRUE)); 677 debug_printf(" .level_zero_only = %u\n", 678 texture->level_zero_only); 679 debug_printf(" .pot = %u %u %u\n", 680 texture->pot_width, 681 texture->pot_height, 682 texture->pot_depth); 683 } 684 struct lp_image_static_state *images = lp_cs_variant_key_images(key); 685 for (i = 0; i < key->nr_images; ++i) { 686 const struct lp_static_texture_state *image = &images[i].image_state; 687 debug_printf("image[%u] = \n", i); 688 debug_printf(" .format = %s\n", 689 util_format_name(image->format)); 690 debug_printf(" .target = %s\n", 691 util_str_tex_target(image->target, TRUE)); 692 debug_printf(" .level_zero_only = %u\n", 693 image->level_zero_only); 694 debug_printf(" .pot = %u %u %u\n", 695 image->pot_width, 696 image->pot_height, 697 image->pot_depth); 698 } 699} 700 701static void 702lp_debug_cs_variant(const struct lp_compute_shader_variant *variant) 703{ 704 debug_printf("llvmpipe: Compute shader #%u variant #%u:\n", 705 variant->shader->no, variant->no); 706 if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) 707 tgsi_dump(variant->shader->base.tokens, 0); 708 else 709 nir_print_shader(variant->shader->base.ir.nir, stderr); 710 dump_cs_variant_key(&variant->key); 711 debug_printf("\n"); 712} 713 714static void 715lp_cs_get_ir_cache_key(struct lp_compute_shader_variant *variant, 716 unsigned char ir_sha1_cache_key[20]) 717{ 718 struct blob blob = { 0 }; 719 unsigned ir_size; 720 void *ir_binary; 721 722 blob_init(&blob); 723 nir_serialize(&blob, variant->shader->base.ir.nir, true); 724 ir_binary = blob.data; 725 ir_size = blob.size; 726 727 struct mesa_sha1 ctx; 728 _mesa_sha1_init(&ctx); 729 _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size); 730 _mesa_sha1_update(&ctx, ir_binary, ir_size); 731 _mesa_sha1_final(&ctx, ir_sha1_cache_key); 732 733 blob_finish(&blob); 734} 735 736static struct lp_compute_shader_variant * 737generate_variant(struct llvmpipe_context *lp, 738 struct lp_compute_shader *shader, 739 const struct lp_compute_shader_variant_key *key) 740{ 741 struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); 742 struct lp_compute_shader_variant *variant; 743 char module_name[64]; 744 unsigned char ir_sha1_cache_key[20]; 745 struct lp_cached_code cached = { 0 }; 746 bool needs_caching = false; 747 variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key); 748 if (!variant) 749 return NULL; 750 751 memset(variant, 0, sizeof(*variant)); 752 snprintf(module_name, sizeof(module_name), "cs%u_variant%u", 753 shader->no, shader->variants_created); 754 755 variant->shader = shader; 756 memcpy(&variant->key, key, shader->variant_key_size); 757 758 if (shader->base.ir.nir) { 759 lp_cs_get_ir_cache_key(variant, ir_sha1_cache_key); 760 761 lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key); 762 if (!cached.data_size) 763 needs_caching = true; 764 } 765 variant->gallivm = gallivm_create(module_name, lp->context, &cached); 766 if (!variant->gallivm) { 767 FREE(variant); 768 return NULL; 769 } 770 771 variant->list_item_global.base = variant; 772 variant->list_item_local.base = variant; 773 variant->no = shader->variants_created++; 774 775 776 777 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { 778 lp_debug_cs_variant(variant); 779 } 780 781 lp_jit_init_cs_types(variant); 782 783 generate_compute(lp, shader, variant); 784 785 gallivm_compile_module(variant->gallivm); 786 787 lp_build_coro_add_malloc_hooks(variant->gallivm); 788 variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module); 789 790 variant->jit_function = (lp_jit_cs_func)gallivm_jit_function(variant->gallivm, variant->function); 791 792 if (needs_caching) { 793 lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key); 794 } 795 gallivm_free_ir(variant->gallivm); 796 return variant; 797} 798 799static void 800lp_cs_ctx_set_cs_variant( struct lp_cs_context *csctx, 801 struct lp_compute_shader_variant *variant) 802{ 803 csctx->cs.current.variant = variant; 804} 805 806static void 807llvmpipe_update_cs(struct llvmpipe_context *lp) 808{ 809 struct lp_compute_shader *shader = lp->cs; 810 811 struct lp_compute_shader_variant_key *key; 812 struct lp_compute_shader_variant *variant = NULL; 813 struct lp_cs_variant_list_item *li; 814 char store[LP_CS_MAX_VARIANT_KEY_SIZE]; 815 816 key = make_variant_key(lp, shader, store); 817 818 /* Search the variants for one which matches the key */ 819 li = first_elem(&shader->variants); 820 while(!at_end(&shader->variants, li)) { 821 if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) { 822 variant = li->base; 823 break; 824 } 825 li = next_elem(li); 826 } 827 828 if (variant) { 829 /* Move this variant to the head of the list to implement LRU 830 * deletion of shader's when we have too many. 831 */ 832 move_to_head(&lp->cs_variants_list, &variant->list_item_global); 833 } 834 else { 835 /* variant not found, create it now */ 836 int64_t t0, t1, dt; 837 unsigned i; 838 unsigned variants_to_cull; 839 840 if (LP_DEBUG & DEBUG_CS) { 841 debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n", 842 lp->nr_cs_variants, 843 lp->nr_cs_instrs, 844 lp->nr_cs_variants ? lp->nr_cs_instrs / lp->nr_cs_variants : 0); 845 } 846 847 /* First, check if we've exceeded the max number of shader variants. 848 * If so, free 6.25% of them (the least recently used ones). 849 */ 850 variants_to_cull = lp->nr_cs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 16 : 0; 851 852 if (variants_to_cull || 853 lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) { 854 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 855 debug_printf("Evicting CS: %u cs variants,\t%u total variants," 856 "\t%u instrs,\t%u instrs/variant\n", 857 shader->variants_cached, 858 lp->nr_cs_variants, lp->nr_cs_instrs, 859 lp->nr_cs_instrs / lp->nr_cs_variants); 860 } 861 862 /* 863 * We need to re-check lp->nr_cs_variants because an arbitrarily large 864 * number of shader variants (potentially all of them) could be 865 * pending for destruction on flush. 866 */ 867 868 for (i = 0; i < variants_to_cull || lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) { 869 struct lp_cs_variant_list_item *item; 870 if (is_empty_list(&lp->cs_variants_list)) { 871 break; 872 } 873 item = last_elem(&lp->cs_variants_list); 874 assert(item); 875 assert(item->base); 876 llvmpipe_remove_cs_shader_variant(lp, item->base); 877 } 878 } 879 /* 880 * Generate the new variant. 881 */ 882 t0 = os_time_get(); 883 variant = generate_variant(lp, shader, key); 884 t1 = os_time_get(); 885 dt = t1 - t0; 886 LP_COUNT_ADD(llvm_compile_time, dt); 887 LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ 888 889 /* Put the new variant into the list */ 890 if (variant) { 891 insert_at_head(&shader->variants, &variant->list_item_local); 892 insert_at_head(&lp->cs_variants_list, &variant->list_item_global); 893 lp->nr_cs_variants++; 894 lp->nr_cs_instrs += variant->nr_instrs; 895 shader->variants_cached++; 896 } 897 } 898 /* Bind this variant */ 899 lp_cs_ctx_set_cs_variant(lp->csctx, variant); 900} 901 902/** 903 * Called during state validation when LP_CSNEW_SAMPLER_VIEW is set. 904 */ 905static void 906lp_csctx_set_sampler_views(struct lp_cs_context *csctx, 907 unsigned num, 908 struct pipe_sampler_view **views) 909{ 910 unsigned i, max_tex_num; 911 912 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 913 914 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); 915 916 max_tex_num = MAX2(num, csctx->cs.current_tex_num); 917 918 for (i = 0; i < max_tex_num; i++) { 919 struct pipe_sampler_view *view = i < num ? views[i] : NULL; 920 921 /* We are going to overwrite/unref the current texture further below. If 922 * set, make sure to unmap its resource to avoid leaking previous 923 * mapping. */ 924 if (csctx->cs.current_tex[i]) 925 llvmpipe_resource_unmap(csctx->cs.current_tex[i], 0, 0); 926 927 if (view) { 928 struct pipe_resource *res = view->texture; 929 struct llvmpipe_resource *lp_tex = llvmpipe_resource(res); 930 struct lp_jit_texture *jit_tex; 931 jit_tex = &csctx->cs.current.jit_context.textures[i]; 932 933 /* We're referencing the texture's internal data, so save a 934 * reference to it. 935 */ 936 pipe_resource_reference(&csctx->cs.current_tex[i], res); 937 938 if (!lp_tex->dt) { 939 /* regular texture - csctx array of mipmap level offsets */ 940 int j; 941 unsigned first_level = 0; 942 unsigned last_level = 0; 943 944 if (llvmpipe_resource_is_texture(res)) { 945 first_level = view->u.tex.first_level; 946 last_level = view->u.tex.last_level; 947 assert(first_level <= last_level); 948 assert(last_level <= res->last_level); 949 jit_tex->base = lp_tex->tex_data; 950 } 951 else { 952 jit_tex->base = lp_tex->data; 953 } 954 if (LP_PERF & PERF_TEX_MEM) { 955 /* use dummy tile memory */ 956 jit_tex->base = lp_dummy_tile; 957 jit_tex->width = TILE_SIZE/8; 958 jit_tex->height = TILE_SIZE/8; 959 jit_tex->depth = 1; 960 jit_tex->first_level = 0; 961 jit_tex->last_level = 0; 962 jit_tex->mip_offsets[0] = 0; 963 jit_tex->row_stride[0] = 0; 964 jit_tex->img_stride[0] = 0; 965 jit_tex->num_samples = 0; 966 jit_tex->sample_stride = 0; 967 } 968 else { 969 jit_tex->width = res->width0; 970 jit_tex->height = res->height0; 971 jit_tex->depth = res->depth0; 972 jit_tex->first_level = first_level; 973 jit_tex->last_level = last_level; 974 jit_tex->num_samples = res->nr_samples; 975 jit_tex->sample_stride = 0; 976 977 if (llvmpipe_resource_is_texture(res)) { 978 for (j = first_level; j <= last_level; j++) { 979 jit_tex->mip_offsets[j] = lp_tex->mip_offsets[j]; 980 jit_tex->row_stride[j] = lp_tex->row_stride[j]; 981 jit_tex->img_stride[j] = lp_tex->img_stride[j]; 982 } 983 jit_tex->sample_stride = lp_tex->sample_stride; 984 985 if (res->target == PIPE_TEXTURE_1D_ARRAY || 986 res->target == PIPE_TEXTURE_2D_ARRAY || 987 res->target == PIPE_TEXTURE_CUBE || 988 res->target == PIPE_TEXTURE_CUBE_ARRAY) { 989 /* 990 * For array textures, we don't have first_layer, instead 991 * adjust last_layer (stored as depth) plus the mip level offsets 992 * (as we have mip-first layout can't just adjust base ptr). 993 * XXX For mip levels, could do something similar. 994 */ 995 jit_tex->depth = view->u.tex.last_layer - view->u.tex.first_layer + 1; 996 for (j = first_level; j <= last_level; j++) { 997 jit_tex->mip_offsets[j] += view->u.tex.first_layer * 998 lp_tex->img_stride[j]; 999 } 1000 if (view->target == PIPE_TEXTURE_CUBE || 1001 view->target == PIPE_TEXTURE_CUBE_ARRAY) { 1002 assert(jit_tex->depth % 6 == 0); 1003 } 1004 assert(view->u.tex.first_layer <= view->u.tex.last_layer); 1005 assert(view->u.tex.last_layer < res->array_size); 1006 } 1007 } 1008 else { 1009 /* 1010 * For buffers, we don't have "offset", instead adjust 1011 * the size (stored as width) plus the base pointer. 1012 */ 1013 unsigned view_blocksize = util_format_get_blocksize(view->format); 1014 /* probably don't really need to fill that out */ 1015 jit_tex->mip_offsets[0] = 0; 1016 jit_tex->row_stride[0] = 0; 1017 jit_tex->img_stride[0] = 0; 1018 1019 /* everything specified in number of elements here. */ 1020 jit_tex->width = view->u.buf.size / view_blocksize; 1021 jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.offset; 1022 /* XXX Unsure if we need to sanitize parameters? */ 1023 assert(view->u.buf.offset + view->u.buf.size <= res->width0); 1024 } 1025 } 1026 } 1027 else { 1028 /* display target texture/surface */ 1029 jit_tex->base = llvmpipe_resource_map(res, 0, 0, LP_TEX_USAGE_READ); 1030 jit_tex->row_stride[0] = lp_tex->row_stride[0]; 1031 jit_tex->img_stride[0] = lp_tex->img_stride[0]; 1032 jit_tex->mip_offsets[0] = 0; 1033 jit_tex->width = res->width0; 1034 jit_tex->height = res->height0; 1035 jit_tex->depth = res->depth0; 1036 jit_tex->first_level = jit_tex->last_level = 0; 1037 jit_tex->num_samples = res->nr_samples; 1038 jit_tex->sample_stride = 0; 1039 assert(jit_tex->base); 1040 } 1041 } 1042 else { 1043 pipe_resource_reference(&csctx->cs.current_tex[i], NULL); 1044 } 1045 } 1046 csctx->cs.current_tex_num = num; 1047} 1048 1049 1050/** 1051 * Called during state validation when LP_NEW_SAMPLER is set. 1052 */ 1053static void 1054lp_csctx_set_sampler_state(struct lp_cs_context *csctx, 1055 unsigned num, 1056 struct pipe_sampler_state **samplers) 1057{ 1058 unsigned i; 1059 1060 LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); 1061 1062 assert(num <= PIPE_MAX_SAMPLERS); 1063 1064 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { 1065 const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL; 1066 1067 if (sampler) { 1068 struct lp_jit_sampler *jit_sam; 1069 jit_sam = &csctx->cs.current.jit_context.samplers[i]; 1070 1071 jit_sam->min_lod = sampler->min_lod; 1072 jit_sam->max_lod = sampler->max_lod; 1073 jit_sam->lod_bias = sampler->lod_bias; 1074 jit_sam->max_aniso = sampler->max_anisotropy; 1075 COPY_4V(jit_sam->border_color, sampler->border_color.f); 1076 } 1077 } 1078} 1079 1080static void 1081lp_csctx_set_cs_constants(struct lp_cs_context *csctx, 1082 unsigned num, 1083 struct pipe_constant_buffer *buffers) 1084{ 1085 unsigned i; 1086 1087 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers); 1088 1089 assert(num <= ARRAY_SIZE(csctx->constants)); 1090 1091 for (i = 0; i < num; ++i) { 1092 util_copy_constant_buffer(&csctx->constants[i].current, &buffers[i], false); 1093 } 1094 for (; i < ARRAY_SIZE(csctx->constants); i++) { 1095 util_copy_constant_buffer(&csctx->constants[i].current, NULL, false); 1096 } 1097} 1098 1099static void 1100lp_csctx_set_cs_ssbos(struct lp_cs_context *csctx, 1101 unsigned num, 1102 struct pipe_shader_buffer *buffers) 1103{ 1104 int i; 1105 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *)buffers); 1106 1107 assert (num <= ARRAY_SIZE(csctx->ssbos)); 1108 1109 for (i = 0; i < num; ++i) { 1110 util_copy_shader_buffer(&csctx->ssbos[i].current, &buffers[i]); 1111 } 1112 for (; i < ARRAY_SIZE(csctx->ssbos); i++) { 1113 util_copy_shader_buffer(&csctx->ssbos[i].current, NULL); 1114 } 1115} 1116 1117static void 1118lp_csctx_set_cs_images(struct lp_cs_context *csctx, 1119 unsigned num, 1120 struct pipe_image_view *images) 1121{ 1122 unsigned i; 1123 1124 LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) images); 1125 1126 assert(num <= ARRAY_SIZE(csctx->images)); 1127 1128 for (i = 0; i < num; ++i) { 1129 struct pipe_image_view *image = &images[i]; 1130 util_copy_image_view(&csctx->images[i].current, &images[i]); 1131 1132 struct pipe_resource *res = image->resource; 1133 struct llvmpipe_resource *lp_res = llvmpipe_resource(res); 1134 struct lp_jit_image *jit_image; 1135 1136 jit_image = &csctx->cs.current.jit_context.images[i]; 1137 if (!lp_res) 1138 continue; 1139 if (!lp_res->dt) { 1140 /* regular texture - csctx array of mipmap level offsets */ 1141 if (llvmpipe_resource_is_texture(res)) { 1142 jit_image->base = lp_res->tex_data; 1143 } else 1144 jit_image->base = lp_res->data; 1145 1146 jit_image->width = res->width0; 1147 jit_image->height = res->height0; 1148 jit_image->depth = res->depth0; 1149 jit_image->num_samples = res->nr_samples; 1150 1151 if (llvmpipe_resource_is_texture(res)) { 1152 uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level]; 1153 const uint32_t bw = util_format_get_blockwidth(image->resource->format); 1154 const uint32_t bh = util_format_get_blockheight(image->resource->format); 1155 1156 jit_image->width = DIV_ROUND_UP(jit_image->width, bw); 1157 jit_image->height = DIV_ROUND_UP(jit_image->height, bh); 1158 jit_image->width = u_minify(jit_image->width, image->u.tex.level); 1159 jit_image->height = u_minify(jit_image->height, image->u.tex.level); 1160 1161 if (res->target == PIPE_TEXTURE_1D_ARRAY || 1162 res->target == PIPE_TEXTURE_2D_ARRAY || 1163 res->target == PIPE_TEXTURE_3D || 1164 res->target == PIPE_TEXTURE_CUBE || 1165 res->target == PIPE_TEXTURE_CUBE_ARRAY) { 1166 /* 1167 * For array textures, we don't have first_layer, instead 1168 * adjust last_layer (stored as depth) plus the mip level offsets 1169 * (as we have mip-first layout can't just adjust base ptr). 1170 * XXX For mip levels, could do something similar. 1171 */ 1172 jit_image->depth = image->u.tex.last_layer - image->u.tex.first_layer + 1; 1173 mip_offset += image->u.tex.first_layer * lp_res->img_stride[image->u.tex.level]; 1174 } else 1175 jit_image->depth = u_minify(jit_image->depth, image->u.tex.level); 1176 1177 jit_image->row_stride = lp_res->row_stride[image->u.tex.level]; 1178 jit_image->img_stride = lp_res->img_stride[image->u.tex.level]; 1179 jit_image->sample_stride = lp_res->sample_stride; 1180 jit_image->base = (uint8_t *)jit_image->base + mip_offset; 1181 } else { 1182 unsigned view_blocksize = util_format_get_blocksize(image->format); 1183 jit_image->width = image->u.buf.size / view_blocksize; 1184 jit_image->base = (uint8_t *)jit_image->base + image->u.buf.offset; 1185 } 1186 } 1187 } 1188 for (; i < ARRAY_SIZE(csctx->images); i++) { 1189 util_copy_image_view(&csctx->images[i].current, NULL); 1190 } 1191} 1192 1193static void 1194update_csctx_consts(struct llvmpipe_context *llvmpipe) 1195{ 1196 struct lp_cs_context *csctx = llvmpipe->csctx; 1197 int i; 1198 1199 for (i = 0; i < ARRAY_SIZE(csctx->constants); ++i) { 1200 struct pipe_resource *buffer = csctx->constants[i].current.buffer; 1201 const ubyte *current_data = NULL; 1202 unsigned current_size = csctx->constants[i].current.buffer_size; 1203 if (buffer) { 1204 /* resource buffer */ 1205 current_data = (ubyte *) llvmpipe_resource_data(buffer); 1206 } 1207 else if (csctx->constants[i].current.user_buffer) { 1208 /* user-space buffer */ 1209 current_data = (ubyte *) csctx->constants[i].current.user_buffer; 1210 } 1211 1212 if (current_data && current_size >= sizeof(float)) { 1213 current_data += csctx->constants[i].current.buffer_offset; 1214 csctx->cs.current.jit_context.constants[i] = (const float *)current_data; 1215 csctx->cs.current.jit_context.num_constants[i] = 1216 DIV_ROUND_UP(csctx->constants[i].current.buffer_size, 1217 lp_get_constant_buffer_stride(llvmpipe->pipe.screen)); 1218 } else { 1219 static const float fake_const_buf[4]; 1220 csctx->cs.current.jit_context.constants[i] = fake_const_buf; 1221 csctx->cs.current.jit_context.num_constants[i] = 0; 1222 } 1223 } 1224} 1225 1226static void 1227update_csctx_ssbo(struct llvmpipe_context *llvmpipe) 1228{ 1229 struct lp_cs_context *csctx = llvmpipe->csctx; 1230 int i; 1231 for (i = 0; i < ARRAY_SIZE(csctx->ssbos); ++i) { 1232 struct pipe_resource *buffer = csctx->ssbos[i].current.buffer; 1233 const ubyte *current_data = NULL; 1234 1235 if (!buffer) 1236 continue; 1237 /* resource buffer */ 1238 current_data = (ubyte *) llvmpipe_resource_data(buffer); 1239 if (current_data) { 1240 current_data += csctx->ssbos[i].current.buffer_offset; 1241 1242 csctx->cs.current.jit_context.ssbos[i] = (const uint32_t *)current_data; 1243 csctx->cs.current.jit_context.num_ssbos[i] = csctx->ssbos[i].current.buffer_size; 1244 } else { 1245 csctx->cs.current.jit_context.ssbos[i] = NULL; 1246 csctx->cs.current.jit_context.num_ssbos[i] = 0; 1247 } 1248 } 1249} 1250 1251static void 1252llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, void *input) 1253{ 1254 if (llvmpipe->cs_dirty & LP_CSNEW_CONSTANTS) { 1255 lp_csctx_set_cs_constants(llvmpipe->csctx, 1256 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_COMPUTE]), 1257 llvmpipe->constants[PIPE_SHADER_COMPUTE]); 1258 update_csctx_consts(llvmpipe); 1259 } 1260 1261 if (llvmpipe->cs_dirty & LP_CSNEW_SSBOS) { 1262 lp_csctx_set_cs_ssbos(llvmpipe->csctx, 1263 ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_COMPUTE]), 1264 llvmpipe->ssbos[PIPE_SHADER_COMPUTE]); 1265 update_csctx_ssbo(llvmpipe); 1266 } 1267 1268 if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER_VIEW) 1269 lp_csctx_set_sampler_views(llvmpipe->csctx, 1270 llvmpipe->num_sampler_views[PIPE_SHADER_COMPUTE], 1271 llvmpipe->sampler_views[PIPE_SHADER_COMPUTE]); 1272 1273 if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER) 1274 lp_csctx_set_sampler_state(llvmpipe->csctx, 1275 llvmpipe->num_samplers[PIPE_SHADER_COMPUTE], 1276 llvmpipe->samplers[PIPE_SHADER_COMPUTE]); 1277 1278 if (llvmpipe->cs_dirty & LP_CSNEW_IMAGES) 1279 lp_csctx_set_cs_images(llvmpipe->csctx, 1280 ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]), 1281 llvmpipe->images[PIPE_SHADER_COMPUTE]); 1282 1283 struct lp_cs_context *csctx = llvmpipe->csctx; 1284 csctx->cs.current.jit_context.aniso_filter_table = lp_build_sample_aniso_filter_table(); 1285 if (input) { 1286 csctx->input = input; 1287 csctx->cs.current.jit_context.kernel_args = input; 1288 } 1289 1290 if (llvmpipe->cs_dirty & (LP_CSNEW_CS | 1291 LP_CSNEW_IMAGES | 1292 LP_CSNEW_SAMPLER_VIEW | 1293 LP_CSNEW_SAMPLER)) 1294 llvmpipe_update_cs(llvmpipe); 1295 1296 1297 llvmpipe->cs_dirty = 0; 1298} 1299 1300static void 1301cs_exec_fn(void *init_data, int iter_idx, struct lp_cs_local_mem *lmem) 1302{ 1303 struct lp_cs_job_info *job_info = init_data; 1304 struct lp_jit_cs_thread_data thread_data; 1305 1306 memset(&thread_data, 0, sizeof(thread_data)); 1307 1308 if (lmem->local_size < job_info->req_local_mem) { 1309 lmem->local_mem_ptr = REALLOC(lmem->local_mem_ptr, lmem->local_size, 1310 job_info->req_local_mem); 1311 lmem->local_size = job_info->req_local_mem; 1312 } 1313 thread_data.shared = lmem->local_mem_ptr; 1314 1315 unsigned grid_z = iter_idx / (job_info->grid_size[0] * job_info->grid_size[1]); 1316 unsigned grid_y = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1]))) / job_info->grid_size[0]; 1317 unsigned grid_x = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1])) - (grid_y * job_info->grid_size[0])); 1318 1319 grid_z += job_info->grid_base[2]; 1320 grid_y += job_info->grid_base[1]; 1321 grid_x += job_info->grid_base[0]; 1322 struct lp_compute_shader_variant *variant = job_info->current->variant; 1323 variant->jit_function(&job_info->current->jit_context, 1324 job_info->block_size[0], job_info->block_size[1], job_info->block_size[2], 1325 grid_x, grid_y, grid_z, 1326 job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2], job_info->work_dim, 1327 &thread_data); 1328} 1329 1330static void 1331fill_grid_size(struct pipe_context *pipe, 1332 const struct pipe_grid_info *info, 1333 uint32_t grid_size[3]) 1334{ 1335 struct pipe_transfer *transfer; 1336 uint32_t *params; 1337 if (!info->indirect) { 1338 grid_size[0] = info->grid[0]; 1339 grid_size[1] = info->grid[1]; 1340 grid_size[2] = info->grid[2]; 1341 return; 1342 } 1343 params = pipe_buffer_map_range(pipe, info->indirect, 1344 info->indirect_offset, 1345 3 * sizeof(uint32_t), 1346 PIPE_MAP_READ, 1347 &transfer); 1348 1349 if (!transfer) 1350 return; 1351 1352 grid_size[0] = params[0]; 1353 grid_size[1] = params[1]; 1354 grid_size[2] = params[2]; 1355 pipe_buffer_unmap(pipe, transfer); 1356} 1357 1358static void llvmpipe_launch_grid(struct pipe_context *pipe, 1359 const struct pipe_grid_info *info) 1360{ 1361 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); 1362 struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); 1363 struct lp_cs_job_info job_info; 1364 1365 if (!llvmpipe_check_render_cond(llvmpipe)) 1366 return; 1367 1368 memset(&job_info, 0, sizeof(job_info)); 1369 1370 llvmpipe_cs_update_derived(llvmpipe, info->input); 1371 1372 fill_grid_size(pipe, info, job_info.grid_size); 1373 1374 job_info.grid_base[0] = info->grid_base[0]; 1375 job_info.grid_base[1] = info->grid_base[1]; 1376 job_info.grid_base[2] = info->grid_base[2]; 1377 job_info.block_size[0] = info->block[0]; 1378 job_info.block_size[1] = info->block[1]; 1379 job_info.block_size[2] = info->block[2]; 1380 job_info.work_dim = info->work_dim; 1381 job_info.req_local_mem = llvmpipe->cs->req_local_mem; 1382 job_info.current = &llvmpipe->csctx->cs.current; 1383 1384 int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0]; 1385 if (num_tasks) { 1386 struct lp_cs_tpool_task *task; 1387 mtx_lock(&screen->cs_mutex); 1388 task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks); 1389 mtx_unlock(&screen->cs_mutex); 1390 1391 lp_cs_tpool_wait_for_task(screen->cs_tpool, &task); 1392 } 1393 llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2]; 1394} 1395 1396static void 1397llvmpipe_set_compute_resources(struct pipe_context *pipe, 1398 unsigned start, unsigned count, 1399 struct pipe_surface **resources) 1400{ 1401 1402 1403} 1404 1405static void 1406llvmpipe_set_global_binding(struct pipe_context *pipe, 1407 unsigned first, unsigned count, 1408 struct pipe_resource **resources, 1409 uint32_t **handles) 1410{ 1411 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); 1412 struct lp_compute_shader *cs = llvmpipe->cs; 1413 unsigned i; 1414 1415 if (first + count > cs->max_global_buffers) { 1416 unsigned old_max = cs->max_global_buffers; 1417 cs->max_global_buffers = first + count; 1418 cs->global_buffers = realloc(cs->global_buffers, 1419 cs->max_global_buffers * sizeof(cs->global_buffers[0])); 1420 if (!cs->global_buffers) { 1421 return; 1422 } 1423 1424 memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0])); 1425 } 1426 1427 if (!resources) { 1428 for (i = 0; i < count; i++) 1429 pipe_resource_reference(&cs->global_buffers[first + i], NULL); 1430 return; 1431 } 1432 1433 for (i = 0; i < count; i++) { 1434 uintptr_t va; 1435 uint32_t offset; 1436 pipe_resource_reference(&cs->global_buffers[first + i], resources[i]); 1437 struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]); 1438 offset = *handles[i]; 1439 va = (uintptr_t)((char *)lp_res->data + offset); 1440 memcpy(handles[i], &va, sizeof(va)); 1441 } 1442} 1443 1444void 1445llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe) 1446{ 1447 llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state; 1448 llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state; 1449 llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state; 1450 llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources; 1451 llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding; 1452 llvmpipe->pipe.launch_grid = llvmpipe_launch_grid; 1453} 1454 1455void 1456lp_csctx_destroy(struct lp_cs_context *csctx) 1457{ 1458 unsigned i; 1459 for (i = 0; i < ARRAY_SIZE(csctx->cs.current_tex); i++) { 1460 struct pipe_resource **res_ptr = &csctx->cs.current_tex[i]; 1461 if (*res_ptr) 1462 llvmpipe_resource_unmap(*res_ptr, 0, 0); 1463 pipe_resource_reference(res_ptr, NULL); 1464 } 1465 for (i = 0; i < ARRAY_SIZE(csctx->constants); i++) { 1466 pipe_resource_reference(&csctx->constants[i].current.buffer, NULL); 1467 } 1468 for (i = 0; i < ARRAY_SIZE(csctx->ssbos); i++) { 1469 pipe_resource_reference(&csctx->ssbos[i].current.buffer, NULL); 1470 } 1471 for (i = 0; i < ARRAY_SIZE(csctx->images); i++) { 1472 pipe_resource_reference(&csctx->images[i].current.resource, NULL); 1473 } 1474 FREE(csctx); 1475} 1476 1477struct lp_cs_context *lp_csctx_create(struct pipe_context *pipe) 1478{ 1479 struct lp_cs_context *csctx; 1480 1481 csctx = CALLOC_STRUCT(lp_cs_context); 1482 if (!csctx) 1483 return NULL; 1484 1485 csctx->pipe = pipe; 1486 return csctx; 1487} 1488