101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2012 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <errno.h> 2501e04c3fSmrg 2601e04c3fSmrg#include "program/prog_instruction.h" 2701e04c3fSmrg 2801e04c3fSmrg#include "blorp_priv.h" 2901e04c3fSmrg#include "compiler/brw_compiler.h" 3001e04c3fSmrg#include "compiler/brw_nir.h" 317ec681f3Smrg#include "dev/intel_debug.h" 327ec681f3Smrg 337ec681f3Smrgconst char * 347ec681f3Smrgblorp_shader_type_to_name(enum blorp_shader_type type) 357ec681f3Smrg{ 367ec681f3Smrg static const char *shader_name[] = { 377ec681f3Smrg [BLORP_SHADER_TYPE_COPY] = "BLORP-copy", 387ec681f3Smrg [BLORP_SHADER_TYPE_BLIT] = "BLORP-blit", 397ec681f3Smrg [BLORP_SHADER_TYPE_CLEAR] = "BLORP-clear", 407ec681f3Smrg [BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE] = "BLORP-mcs-partial-resolve", 417ec681f3Smrg [BLORP_SHADER_TYPE_LAYER_OFFSET_VS] = "BLORP-layer-offset-vs", 427ec681f3Smrg [BLORP_SHADER_TYPE_GFX4_SF] = "BLORP-gfx4-sf", 437ec681f3Smrg }; 447ec681f3Smrg assert(type < ARRAY_SIZE(shader_name)); 457ec681f3Smrg 467ec681f3Smrg return shader_name[type]; 477ec681f3Smrg} 4801e04c3fSmrg 4901e04c3fSmrgvoid 5001e04c3fSmrgblorp_init(struct blorp_context *blorp, void *driver_ctx, 5101e04c3fSmrg struct isl_device *isl_dev) 5201e04c3fSmrg{ 5301e04c3fSmrg blorp->driver_ctx = driver_ctx; 5401e04c3fSmrg blorp->isl_dev = isl_dev; 5501e04c3fSmrg} 5601e04c3fSmrg 5701e04c3fSmrgvoid 5801e04c3fSmrgblorp_finish(struct blorp_context *blorp) 5901e04c3fSmrg{ 6001e04c3fSmrg blorp->driver_ctx = NULL; 6101e04c3fSmrg} 6201e04c3fSmrg 6301e04c3fSmrgvoid 6401e04c3fSmrgblorp_batch_init(struct blorp_context *blorp, 6501e04c3fSmrg struct blorp_batch *batch, void *driver_batch, 6601e04c3fSmrg enum blorp_batch_flags flags) 6701e04c3fSmrg{ 6801e04c3fSmrg batch->blorp = blorp; 6901e04c3fSmrg batch->driver_batch = driver_batch; 7001e04c3fSmrg batch->flags = flags; 7101e04c3fSmrg} 7201e04c3fSmrg 7301e04c3fSmrgvoid 7401e04c3fSmrgblorp_batch_finish(struct blorp_batch *batch) 7501e04c3fSmrg{ 7601e04c3fSmrg batch->blorp = NULL; 7701e04c3fSmrg} 7801e04c3fSmrg 7901e04c3fSmrgvoid 807ec681f3Smrgbrw_blorp_surface_info_init(struct blorp_batch *batch, 8101e04c3fSmrg struct brw_blorp_surface_info *info, 8201e04c3fSmrg const struct blorp_surf *surf, 837ec681f3Smrg unsigned int level, float layer, 847ec681f3Smrg enum isl_format format, bool is_dest) 8501e04c3fSmrg{ 867ec681f3Smrg struct blorp_context *blorp = batch->blorp; 877ec681f3Smrg memset(info, 0, sizeof(*info)); 8801e04c3fSmrg assert(level < surf->surf->levels); 8901e04c3fSmrg assert(layer < MAX2(surf->surf->logical_level0_px.depth >> level, 9001e04c3fSmrg surf->surf->logical_level0_px.array_len)); 9101e04c3fSmrg 9201e04c3fSmrg info->enabled = true; 9301e04c3fSmrg 9401e04c3fSmrg if (format == ISL_FORMAT_UNSUPPORTED) 9501e04c3fSmrg format = surf->surf->format; 9601e04c3fSmrg 9701e04c3fSmrg info->surf = *surf->surf; 9801e04c3fSmrg info->addr = surf->addr; 9901e04c3fSmrg 10001e04c3fSmrg info->aux_usage = surf->aux_usage; 10101e04c3fSmrg if (info->aux_usage != ISL_AUX_USAGE_NONE) { 10201e04c3fSmrg info->aux_surf = *surf->aux_surf; 10301e04c3fSmrg info->aux_addr = surf->aux_addr; 10401e04c3fSmrg } 10501e04c3fSmrg 10601e04c3fSmrg info->clear_color = surf->clear_color; 10701e04c3fSmrg info->clear_color_addr = surf->clear_color_addr; 10801e04c3fSmrg 1097ec681f3Smrg isl_surf_usage_flags_t view_usage; 1107ec681f3Smrg if (is_dest) { 1117ec681f3Smrg if (batch->flags & BLORP_BATCH_USE_COMPUTE) 1127ec681f3Smrg view_usage = ISL_SURF_USAGE_STORAGE_BIT; 1137ec681f3Smrg else 1147ec681f3Smrg view_usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; 1157ec681f3Smrg } else { 1167ec681f3Smrg view_usage = ISL_SURF_USAGE_TEXTURE_BIT; 1177ec681f3Smrg } 1187ec681f3Smrg 11901e04c3fSmrg info->view = (struct isl_view) { 1207ec681f3Smrg .usage = view_usage, 12101e04c3fSmrg .format = format, 12201e04c3fSmrg .base_level = level, 12301e04c3fSmrg .levels = 1, 12401e04c3fSmrg .swizzle = ISL_SWIZZLE_IDENTITY, 12501e04c3fSmrg }; 12601e04c3fSmrg 12701e04c3fSmrg info->view.array_len = MAX2(info->surf.logical_level0_px.depth, 12801e04c3fSmrg info->surf.logical_level0_px.array_len); 12901e04c3fSmrg 1307ec681f3Smrg if (!is_dest && 13101e04c3fSmrg (info->surf.dim == ISL_SURF_DIM_3D || 13201e04c3fSmrg info->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)) { 13301e04c3fSmrg /* 3-D textures don't support base_array layer and neither do 2-D 13401e04c3fSmrg * multisampled textures on IVB so we need to pass it through the 13501e04c3fSmrg * sampler in those cases. These are also two cases where we are 13601e04c3fSmrg * guaranteed that we won't be doing any funny surface hacks. 13701e04c3fSmrg */ 13801e04c3fSmrg info->view.base_array_layer = 0; 13901e04c3fSmrg info->z_offset = layer; 14001e04c3fSmrg } else { 14101e04c3fSmrg info->view.base_array_layer = layer; 14201e04c3fSmrg 14301e04c3fSmrg assert(info->view.array_len >= info->view.base_array_layer); 14401e04c3fSmrg info->view.array_len -= info->view.base_array_layer; 14501e04c3fSmrg info->z_offset = 0; 14601e04c3fSmrg } 14701e04c3fSmrg 14801e04c3fSmrg /* Sandy Bridge and earlier have a limit of a maximum of 512 layers for 14901e04c3fSmrg * layered rendering. 15001e04c3fSmrg */ 1517ec681f3Smrg if (is_dest && blorp->isl_dev->info->ver <= 6) 15201e04c3fSmrg info->view.array_len = MIN2(info->view.array_len, 512); 15301e04c3fSmrg 15401e04c3fSmrg if (surf->tile_x_sa || surf->tile_y_sa) { 15501e04c3fSmrg /* This is only allowed on simple 2D surfaces without MSAA */ 15601e04c3fSmrg assert(info->surf.dim == ISL_SURF_DIM_2D); 15701e04c3fSmrg assert(info->surf.samples == 1); 15801e04c3fSmrg assert(info->surf.levels == 1); 15901e04c3fSmrg assert(info->surf.logical_level0_px.array_len == 1); 16001e04c3fSmrg assert(info->aux_usage == ISL_AUX_USAGE_NONE); 16101e04c3fSmrg 16201e04c3fSmrg info->tile_x_sa = surf->tile_x_sa; 16301e04c3fSmrg info->tile_y_sa = surf->tile_y_sa; 16401e04c3fSmrg 16501e04c3fSmrg /* Instead of using the X/Y Offset fields in RENDER_SURFACE_STATE, we 16601e04c3fSmrg * place the image at the tile boundary and offset our sampling or 16701e04c3fSmrg * rendering. For this reason, we need to grow the image by the offset 16801e04c3fSmrg * to ensure that the hardware doesn't think we've gone past the edge. 16901e04c3fSmrg */ 17001e04c3fSmrg info->surf.logical_level0_px.w += surf->tile_x_sa; 17101e04c3fSmrg info->surf.logical_level0_px.h += surf->tile_y_sa; 17201e04c3fSmrg info->surf.phys_level0_sa.w += surf->tile_x_sa; 17301e04c3fSmrg info->surf.phys_level0_sa.h += surf->tile_y_sa; 17401e04c3fSmrg } 17501e04c3fSmrg} 17601e04c3fSmrg 17701e04c3fSmrg 17801e04c3fSmrgvoid 17901e04c3fSmrgblorp_params_init(struct blorp_params *params) 18001e04c3fSmrg{ 18101e04c3fSmrg memset(params, 0, sizeof(*params)); 18201e04c3fSmrg params->num_samples = 1; 18301e04c3fSmrg params->num_draw_buffers = 1; 18401e04c3fSmrg params->num_layers = 1; 18501e04c3fSmrg} 18601e04c3fSmrg 1877ec681f3Smrgstatic void 1887ec681f3Smrgblorp_init_base_prog_key(struct brw_base_prog_key *key) 1897ec681f3Smrg{ 1907ec681f3Smrg for (int i = 0; i < MAX_SAMPLERS; i++) 1917ec681f3Smrg key->tex.swizzles[i] = SWIZZLE_XYZW; 1927ec681f3Smrg} 1937ec681f3Smrg 19401e04c3fSmrgvoid 19501e04c3fSmrgbrw_blorp_init_wm_prog_key(struct brw_wm_prog_key *wm_key) 19601e04c3fSmrg{ 19701e04c3fSmrg memset(wm_key, 0, sizeof(*wm_key)); 19801e04c3fSmrg wm_key->nr_color_regions = 1; 1997ec681f3Smrg blorp_init_base_prog_key(&wm_key->base); 2007ec681f3Smrg} 2017ec681f3Smrg 2027ec681f3Smrgvoid 2037ec681f3Smrgbrw_blorp_init_cs_prog_key(struct brw_cs_prog_key *cs_key) 2047ec681f3Smrg{ 2057ec681f3Smrg memset(cs_key, 0, sizeof(*cs_key)); 2067ec681f3Smrg blorp_init_base_prog_key(&cs_key->base); 20701e04c3fSmrg} 20801e04c3fSmrg 20901e04c3fSmrgconst unsigned * 21001e04c3fSmrgblorp_compile_fs(struct blorp_context *blorp, void *mem_ctx, 21101e04c3fSmrg struct nir_shader *nir, 21201e04c3fSmrg struct brw_wm_prog_key *wm_key, 21301e04c3fSmrg bool use_repclear, 21401e04c3fSmrg struct brw_wm_prog_data *wm_prog_data) 21501e04c3fSmrg{ 21601e04c3fSmrg const struct brw_compiler *compiler = blorp->compiler; 21701e04c3fSmrg 21801e04c3fSmrg nir->options = 21901e04c3fSmrg compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions; 22001e04c3fSmrg 22101e04c3fSmrg memset(wm_prog_data, 0, sizeof(*wm_prog_data)); 22201e04c3fSmrg 22301e04c3fSmrg wm_prog_data->base.nr_params = 0; 22401e04c3fSmrg wm_prog_data->base.param = NULL; 22501e04c3fSmrg 22601e04c3fSmrg /* BLORP always uses the first two binding table entries: 22701e04c3fSmrg * - Surface 0 is the render target (which always start from 0) 22801e04c3fSmrg * - Surface 1 is the source texture 22901e04c3fSmrg */ 23001e04c3fSmrg wm_prog_data->base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX; 23101e04c3fSmrg 2327ec681f3Smrg brw_preprocess_nir(compiler, nir, NULL); 2337ec681f3Smrg nir_remove_dead_variables(nir, nir_var_shader_in, NULL); 23401e04c3fSmrg nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 23501e04c3fSmrg 2367ec681f3Smrg if (blorp->compiler->devinfo->ver < 6) { 23701e04c3fSmrg if (nir->info.fs.uses_discard) 23801e04c3fSmrg wm_key->iz_lookup |= BRW_WM_IZ_PS_KILL_ALPHATEST_BIT; 23901e04c3fSmrg 24001e04c3fSmrg wm_key->input_slots_valid = nir->info.inputs_read | VARYING_BIT_POS; 24101e04c3fSmrg } 24201e04c3fSmrg 2437ec681f3Smrg struct brw_compile_fs_params params = { 2447ec681f3Smrg .nir = nir, 2457ec681f3Smrg .key = wm_key, 2467ec681f3Smrg .prog_data = wm_prog_data, 24701e04c3fSmrg 2487ec681f3Smrg .use_rep_send = use_repclear, 2497ec681f3Smrg .log_data = blorp->driver_ctx, 2507ec681f3Smrg 2517ec681f3Smrg .debug_flag = DEBUG_BLORP, 2527ec681f3Smrg }; 2537ec681f3Smrg 2547ec681f3Smrg return brw_compile_fs(compiler, mem_ctx, ¶ms); 25501e04c3fSmrg} 25601e04c3fSmrg 25701e04c3fSmrgconst unsigned * 25801e04c3fSmrgblorp_compile_vs(struct blorp_context *blorp, void *mem_ctx, 25901e04c3fSmrg struct nir_shader *nir, 26001e04c3fSmrg struct brw_vs_prog_data *vs_prog_data) 26101e04c3fSmrg{ 26201e04c3fSmrg const struct brw_compiler *compiler = blorp->compiler; 26301e04c3fSmrg 26401e04c3fSmrg nir->options = 26501e04c3fSmrg compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions; 26601e04c3fSmrg 2677ec681f3Smrg brw_preprocess_nir(compiler, nir, NULL); 26801e04c3fSmrg nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 26901e04c3fSmrg 27001e04c3fSmrg vs_prog_data->inputs_read = nir->info.inputs_read; 27101e04c3fSmrg 27201e04c3fSmrg brw_compute_vue_map(compiler->devinfo, 27301e04c3fSmrg &vs_prog_data->base.vue_map, 27401e04c3fSmrg nir->info.outputs_written, 2757ec681f3Smrg nir->info.separate_shader, 2767ec681f3Smrg 1); 27701e04c3fSmrg 27801e04c3fSmrg struct brw_vs_prog_key vs_key = { 0, }; 27901e04c3fSmrg 2807ec681f3Smrg struct brw_compile_vs_params params = { 2817ec681f3Smrg .nir = nir, 2827ec681f3Smrg .key = &vs_key, 2837ec681f3Smrg .prog_data = vs_prog_data, 2847ec681f3Smrg .log_data = blorp->driver_ctx, 2857ec681f3Smrg 2867ec681f3Smrg .debug_flag = DEBUG_BLORP, 2877ec681f3Smrg }; 2887ec681f3Smrg 2897ec681f3Smrg return brw_compile_vs(compiler, mem_ctx, ¶ms); 2907ec681f3Smrg} 2917ec681f3Smrg 2927ec681f3Smrgconst unsigned * 2937ec681f3Smrgblorp_compile_cs(struct blorp_context *blorp, void *mem_ctx, 2947ec681f3Smrg struct nir_shader *nir, 2957ec681f3Smrg struct brw_cs_prog_key *cs_key, 2967ec681f3Smrg struct brw_cs_prog_data *cs_prog_data) 2977ec681f3Smrg{ 2987ec681f3Smrg const struct brw_compiler *compiler = blorp->compiler; 2997ec681f3Smrg 3007ec681f3Smrg nir->options = 3017ec681f3Smrg compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions; 3027ec681f3Smrg 3037ec681f3Smrg memset(cs_prog_data, 0, sizeof(*cs_prog_data)); 3047ec681f3Smrg 3057ec681f3Smrg /* BLORP always uses the first two binding table entries: 3067ec681f3Smrg * - Surface 0 is the destination image (which always start from 0) 3077ec681f3Smrg * - Surface 1 is the source texture 3087ec681f3Smrg */ 3097ec681f3Smrg cs_prog_data->base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX; 3107ec681f3Smrg 3117ec681f3Smrg brw_preprocess_nir(compiler, nir, NULL); 3127ec681f3Smrg nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 3137ec681f3Smrg 3147ec681f3Smrg NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, type_size_scalar_bytes, 3157ec681f3Smrg (nir_lower_io_options)0); 3167ec681f3Smrg 3177ec681f3Smrg STATIC_ASSERT(offsetof(struct brw_blorp_wm_inputs, subgroup_id) + 4 == 3187ec681f3Smrg sizeof(struct brw_blorp_wm_inputs)); 3197ec681f3Smrg nir->num_uniforms = offsetof(struct brw_blorp_wm_inputs, subgroup_id); 3207ec681f3Smrg unsigned nr_params = nir->num_uniforms / 4; 3217ec681f3Smrg cs_prog_data->base.nr_params = nr_params; 3227ec681f3Smrg cs_prog_data->base.param = rzalloc_array(NULL, uint32_t, nr_params); 3237ec681f3Smrg 3247ec681f3Smrg NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics); 3257ec681f3Smrg 3267ec681f3Smrg struct brw_compile_cs_params params = { 3277ec681f3Smrg .nir = nir, 3287ec681f3Smrg .key = cs_key, 3297ec681f3Smrg .prog_data = cs_prog_data, 3307ec681f3Smrg .log_data = blorp->driver_ctx, 3317ec681f3Smrg .debug_flag = DEBUG_BLORP, 3327ec681f3Smrg }; 3337ec681f3Smrg 3347ec681f3Smrg const unsigned *program = brw_compile_cs(compiler, mem_ctx, ¶ms); 3357ec681f3Smrg 3367ec681f3Smrg ralloc_free(cs_prog_data->base.param); 3377ec681f3Smrg cs_prog_data->base.param = NULL; 33801e04c3fSmrg 33901e04c3fSmrg return program; 34001e04c3fSmrg} 34101e04c3fSmrg 34201e04c3fSmrgstruct blorp_sf_key { 3437ec681f3Smrg struct brw_blorp_base_key base; 34401e04c3fSmrg struct brw_sf_prog_key key; 34501e04c3fSmrg}; 34601e04c3fSmrg 34701e04c3fSmrgbool 3489f464c52Smayablorp_ensure_sf_program(struct blorp_batch *batch, 34901e04c3fSmrg struct blorp_params *params) 35001e04c3fSmrg{ 3519f464c52Smaya struct blorp_context *blorp = batch->blorp; 35201e04c3fSmrg const struct brw_wm_prog_data *wm_prog_data = params->wm_prog_data; 35301e04c3fSmrg assert(params->wm_prog_data); 35401e04c3fSmrg 3557ec681f3Smrg /* Gfx6+ doesn't need a strips and fans program */ 3567ec681f3Smrg if (blorp->compiler->devinfo->ver >= 6) 35701e04c3fSmrg return true; 35801e04c3fSmrg 35901e04c3fSmrg struct blorp_sf_key key = { 3607ec681f3Smrg .base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_GFX4_SF), 36101e04c3fSmrg }; 36201e04c3fSmrg 36301e04c3fSmrg /* Everything gets compacted in vertex setup, so we just need a 36401e04c3fSmrg * pass-through for the correct number of input varyings. 36501e04c3fSmrg */ 36601e04c3fSmrg const uint64_t slots_valid = VARYING_BIT_POS | 36701e04c3fSmrg ((1ull << wm_prog_data->num_varying_inputs) - 1) << VARYING_SLOT_VAR0; 36801e04c3fSmrg 36901e04c3fSmrg key.key.attrs = slots_valid; 37001e04c3fSmrg key.key.primitive = BRW_SF_PRIM_TRIANGLES; 37101e04c3fSmrg key.key.contains_flat_varying = wm_prog_data->contains_flat_varying; 37201e04c3fSmrg 37301e04c3fSmrg STATIC_ASSERT(sizeof(key.key.interp_mode) == 37401e04c3fSmrg sizeof(wm_prog_data->interp_mode)); 37501e04c3fSmrg memcpy(key.key.interp_mode, wm_prog_data->interp_mode, 37601e04c3fSmrg sizeof(key.key.interp_mode)); 37701e04c3fSmrg 3789f464c52Smaya if (blorp->lookup_shader(batch, &key, sizeof(key), 37901e04c3fSmrg ¶ms->sf_prog_kernel, ¶ms->sf_prog_data)) 38001e04c3fSmrg return true; 38101e04c3fSmrg 38201e04c3fSmrg void *mem_ctx = ralloc_context(NULL); 38301e04c3fSmrg 38401e04c3fSmrg const unsigned *program; 38501e04c3fSmrg unsigned program_size; 38601e04c3fSmrg 38701e04c3fSmrg struct brw_vue_map vue_map; 3887ec681f3Smrg brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false, 1); 38901e04c3fSmrg 39001e04c3fSmrg struct brw_sf_prog_data prog_data_tmp; 39101e04c3fSmrg program = brw_compile_sf(blorp->compiler, mem_ctx, &key.key, 39201e04c3fSmrg &prog_data_tmp, &vue_map, &program_size); 39301e04c3fSmrg 39401e04c3fSmrg bool result = 3957ec681f3Smrg blorp->upload_shader(batch, MESA_SHADER_NONE, 3967ec681f3Smrg &key, sizeof(key), program, program_size, 39701e04c3fSmrg (void *)&prog_data_tmp, sizeof(prog_data_tmp), 39801e04c3fSmrg ¶ms->sf_prog_kernel, ¶ms->sf_prog_data); 39901e04c3fSmrg 40001e04c3fSmrg ralloc_free(mem_ctx); 40101e04c3fSmrg 40201e04c3fSmrg return result; 40301e04c3fSmrg} 40401e04c3fSmrg 40501e04c3fSmrgvoid 40601e04c3fSmrgblorp_hiz_op(struct blorp_batch *batch, struct blorp_surf *surf, 40701e04c3fSmrg uint32_t level, uint32_t start_layer, uint32_t num_layers, 40801e04c3fSmrg enum isl_aux_op op) 40901e04c3fSmrg{ 4107ec681f3Smrg const struct intel_device_info *devinfo = batch->blorp->isl_dev->info; 4117ec681f3Smrg 41201e04c3fSmrg struct blorp_params params; 41301e04c3fSmrg blorp_params_init(¶ms); 41401e04c3fSmrg 41501e04c3fSmrg params.hiz_op = op; 41601e04c3fSmrg params.full_surface_hiz_op = true; 4177ec681f3Smrg switch (op) { 4187ec681f3Smrg case ISL_AUX_OP_FULL_RESOLVE: 4197ec681f3Smrg params.snapshot_type = INTEL_SNAPSHOT_HIZ_RESOLVE; 4207ec681f3Smrg break; 4217ec681f3Smrg case ISL_AUX_OP_AMBIGUATE: 4227ec681f3Smrg params.snapshot_type = INTEL_SNAPSHOT_HIZ_AMBIGUATE; 4237ec681f3Smrg break; 4247ec681f3Smrg case ISL_AUX_OP_FAST_CLEAR: 4257ec681f3Smrg params.snapshot_type = INTEL_SNAPSHOT_HIZ_CLEAR; 4267ec681f3Smrg break; 4277ec681f3Smrg case ISL_AUX_OP_PARTIAL_RESOLVE: 4287ec681f3Smrg case ISL_AUX_OP_NONE: 4297ec681f3Smrg unreachable("Invalid HiZ op"); 4307ec681f3Smrg } 43101e04c3fSmrg 43201e04c3fSmrg for (uint32_t a = 0; a < num_layers; a++) { 43301e04c3fSmrg const uint32_t layer = start_layer + a; 43401e04c3fSmrg 4357ec681f3Smrg brw_blorp_surface_info_init(batch, ¶ms.depth, surf, level, 43601e04c3fSmrg layer, surf->surf->format, true); 43701e04c3fSmrg 43801e04c3fSmrg /* Align the rectangle primitive to 8x4 pixels. 43901e04c3fSmrg * 44001e04c3fSmrg * During fast depth clears, the emitted rectangle primitive must be 44101e04c3fSmrg * aligned to 8x4 pixels. From the Ivybridge PRM, Vol 2 Part 1 Section 44201e04c3fSmrg * 11.5.3.1 Depth Buffer Clear (and the matching section in the 44301e04c3fSmrg * Sandybridge PRM): 44401e04c3fSmrg * 44501e04c3fSmrg * If Number of Multisamples is NUMSAMPLES_1, the rectangle must be 44601e04c3fSmrg * aligned to an 8x4 pixel block relative to the upper left corner 44701e04c3fSmrg * of the depth buffer [...] 44801e04c3fSmrg * 44901e04c3fSmrg * For hiz resolves, the rectangle must also be 8x4 aligned. Item 45001e04c3fSmrg * WaHizAmbiguate8x4Aligned from the Haswell workarounds page and the 45101e04c3fSmrg * Ivybridge simulator require the alignment. 45201e04c3fSmrg * 45301e04c3fSmrg * To be safe, let's just align the rect for all hiz operations and all 45401e04c3fSmrg * hardware generations. 45501e04c3fSmrg * 45601e04c3fSmrg * However, for some miptree slices of a Z24 texture, emitting an 8x4 45701e04c3fSmrg * aligned rectangle that covers the slice may clobber adjacent slices 45801e04c3fSmrg * if we strictly adhered to the texture alignments specified in the 45901e04c3fSmrg * PRM. The Ivybridge PRM, Section "Alignment Unit Size", states that 46001e04c3fSmrg * SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24 46101e04c3fSmrg * surfaces, not 8. But commit 1f112cc increased the alignment from 4 to 46201e04c3fSmrg * 8, which prevents the clobbering. 46301e04c3fSmrg */ 46401e04c3fSmrg params.x1 = minify(params.depth.surf.logical_level0_px.width, 46501e04c3fSmrg params.depth.view.base_level); 46601e04c3fSmrg params.y1 = minify(params.depth.surf.logical_level0_px.height, 46701e04c3fSmrg params.depth.view.base_level); 46801e04c3fSmrg params.x1 = ALIGN(params.x1, 8); 46901e04c3fSmrg params.y1 = ALIGN(params.y1, 4); 47001e04c3fSmrg 47101e04c3fSmrg if (params.depth.view.base_level == 0) { 47201e04c3fSmrg /* TODO: What about MSAA? */ 47301e04c3fSmrg params.depth.surf.logical_level0_px.width = params.x1; 47401e04c3fSmrg params.depth.surf.logical_level0_px.height = params.y1; 4757ec681f3Smrg } else if (devinfo->ver >= 8 && devinfo->ver <= 9 && 4767ec681f3Smrg op == ISL_AUX_OP_AMBIGUATE) { 4777ec681f3Smrg /* On some platforms, it's not enough to just adjust the clear 4787ec681f3Smrg * rectangle when the LOD is greater than 0. 4797ec681f3Smrg * 4807ec681f3Smrg * From the BDW and SKL PRMs, Vol 7, "Optimized Hierarchical Depth 4817ec681f3Smrg * Buffer Resolve": 4827ec681f3Smrg * 4837ec681f3Smrg * The following is required when performing a hierarchical depth 4847ec681f3Smrg * buffer resolve: 4857ec681f3Smrg * 4867ec681f3Smrg * - A rectangle primitive covering the full render target must be 4877ec681f3Smrg * programmed on Xmin, Ymin, Xmax, and Ymax in the 4887ec681f3Smrg * 3DSTATE_WM_HZ_OP command. 4897ec681f3Smrg * 4907ec681f3Smrg * - The rectangle primitive size must be aligned to 8x4 pixels. 4917ec681f3Smrg * 4927ec681f3Smrg * And from the Clear Rectangle programming note in 3DSTATE_WM_HZ_OP 4937ec681f3Smrg * (Vol 2a): 4947ec681f3Smrg * 4957ec681f3Smrg * Hence the max values must be less than or equal to: ( Surface 4967ec681f3Smrg * Width » LOD ) and ( Surface Height » LOD ) for X Max and Y Max 4977ec681f3Smrg * respectively. 4987ec681f3Smrg * 4997ec681f3Smrg * This means that the extent of the LOD must be naturally 5007ec681f3Smrg * 8x4-aligned after minification of the base LOD. Since the base LOD 5017ec681f3Smrg * dimensions affect the placement of smaller LODs, it's not trivial 5027ec681f3Smrg * (nor possible, at times) to satisfy the requirement by adjusting 5037ec681f3Smrg * the base LOD extent. Just assert that the caller is accessing an 5047ec681f3Smrg * LOD that satisfies this requirement. 5057ec681f3Smrg */ 5067ec681f3Smrg assert(minify(params.depth.surf.logical_level0_px.width, 5077ec681f3Smrg params.depth.view.base_level) == params.x1); 5087ec681f3Smrg assert(minify(params.depth.surf.logical_level0_px.height, 5097ec681f3Smrg params.depth.view.base_level) == params.y1); 51001e04c3fSmrg } 51101e04c3fSmrg 51201e04c3fSmrg params.dst.surf.samples = params.depth.surf.samples; 51301e04c3fSmrg params.dst.surf.logical_level0_px = params.depth.surf.logical_level0_px; 51401e04c3fSmrg params.depth_format = 51501e04c3fSmrg isl_format_get_depth_format(surf->surf->format, false); 51601e04c3fSmrg params.num_samples = params.depth.surf.samples; 51701e04c3fSmrg 51801e04c3fSmrg batch->blorp->exec(batch, ¶ms); 51901e04c3fSmrg } 52001e04c3fSmrg} 521