1/* 2 * Copyright © 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/ralloc.h" 25 26#include "main/macros.h" /* Needed for MAX3 and MAX2 for format_rgb9e5 */ 27#include "util/format_rgb9e5.h" 28#include "util/format_srgb.h" 29 30#include "blorp_priv.h" 31#include "compiler/brw_eu_defines.h" 32 33#include "blorp_nir_builder.h" 34 35#define FILE_DEBUG_FLAG DEBUG_BLORP 36 37struct brw_blorp_const_color_prog_key 38{ 39 enum blorp_shader_type shader_type; /* Must be BLORP_SHADER_TYPE_CLEAR */ 40 bool use_simd16_replicated_data; 41 bool clear_rgb_as_red; 42 bool pad[3]; 43}; 44 45static bool 46blorp_params_get_clear_kernel(struct blorp_batch *batch, 47 struct blorp_params *params, 48 bool use_replicated_data, 49 bool clear_rgb_as_red) 50{ 51 struct blorp_context *blorp = batch->blorp; 52 53 const struct brw_blorp_const_color_prog_key blorp_key = { 54 .shader_type = BLORP_SHADER_TYPE_CLEAR, 55 .use_simd16_replicated_data = use_replicated_data, 56 .clear_rgb_as_red = clear_rgb_as_red, 57 }; 58 59 if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), 60 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) 61 return true; 62 63 void *mem_ctx = ralloc_context(NULL); 64 65 nir_builder b; 66 blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, "BLORP-clear"); 67 68 nir_variable *v_color = 69 BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); 70 nir_ssa_def *color = nir_load_var(&b, v_color); 71 72 if (clear_rgb_as_red) { 73 nir_variable *frag_coord = 74 nir_variable_create(b.shader, nir_var_shader_in, 75 glsl_vec4_type(), "gl_FragCoord"); 76 frag_coord->data.location = VARYING_SLOT_POS; 77 78 nir_ssa_def *pos = nir_f2i32(&b, nir_load_var(&b, frag_coord)); 79 nir_ssa_def *comp = nir_umod(&b, nir_channel(&b, pos, 0), 80 nir_imm_int(&b, 3)); 81 nir_ssa_def *color_component = 82 nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 0)), 83 nir_channel(&b, color, 0), 84 nir_bcsel(&b, nir_ieq(&b, comp, nir_imm_int(&b, 1)), 85 nir_channel(&b, color, 1), 86 nir_channel(&b, color, 2))); 87 88 nir_ssa_def *u = nir_ssa_undef(&b, 1, 32); 89 color = nir_vec4(&b, color_component, u, u, u); 90 } 91 92 nir_variable *frag_color = nir_variable_create(b.shader, nir_var_shader_out, 93 glsl_vec4_type(), 94 "gl_FragColor"); 95 frag_color->data.location = FRAG_RESULT_COLOR; 96 nir_store_var(&b, frag_color, color, 0xf); 97 98 struct brw_wm_prog_key wm_key; 99 brw_blorp_init_wm_prog_key(&wm_key); 100 101 struct brw_wm_prog_data prog_data; 102 const unsigned *program = 103 blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, use_replicated_data, 104 &prog_data); 105 106 bool result = 107 blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key), 108 program, prog_data.base.program_size, 109 &prog_data.base, sizeof(prog_data), 110 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); 111 112 ralloc_free(mem_ctx); 113 return result; 114} 115 116struct layer_offset_vs_key { 117 enum blorp_shader_type shader_type; 118 unsigned num_inputs; 119}; 120 121/* In the case of doing attachment clears, we are using a surface state that 122 * is handed to us so we can't set (and don't even know) the base array layer. 123 * In order to do a layered clear in this scenario, we need some way of adding 124 * the base array layer to the instance id. Unfortunately, our hardware has 125 * no real concept of "base instance", so we have to do it manually in a 126 * vertex shader. 127 */ 128static bool 129blorp_params_get_layer_offset_vs(struct blorp_batch *batch, 130 struct blorp_params *params) 131{ 132 struct blorp_context *blorp = batch->blorp; 133 struct layer_offset_vs_key blorp_key = { 134 .shader_type = BLORP_SHADER_TYPE_LAYER_OFFSET_VS, 135 }; 136 137 if (params->wm_prog_data) 138 blorp_key.num_inputs = params->wm_prog_data->num_varying_inputs; 139 140 if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), 141 ¶ms->vs_prog_kernel, ¶ms->vs_prog_data)) 142 return true; 143 144 void *mem_ctx = ralloc_context(NULL); 145 146 nir_builder b; 147 blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_VERTEX, "BLORP-layer-offset-vs"); 148 149 const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4); 150 151 /* First we deal with the header which has instance and base instance */ 152 nir_variable *a_header = nir_variable_create(b.shader, nir_var_shader_in, 153 uvec4_type, "header"); 154 a_header->data.location = VERT_ATTRIB_GENERIC0; 155 156 nir_variable *v_layer = nir_variable_create(b.shader, nir_var_shader_out, 157 glsl_int_type(), "layer_id"); 158 v_layer->data.location = VARYING_SLOT_LAYER; 159 160 /* Compute the layer id */ 161 nir_ssa_def *header = nir_load_var(&b, a_header); 162 nir_ssa_def *base_layer = nir_channel(&b, header, 0); 163 nir_ssa_def *instance = nir_channel(&b, header, 1); 164 nir_store_var(&b, v_layer, nir_iadd(&b, instance, base_layer), 0x1); 165 166 /* Then we copy the vertex from the next slot to VARYING_SLOT_POS */ 167 nir_variable *a_vertex = nir_variable_create(b.shader, nir_var_shader_in, 168 glsl_vec4_type(), "a_vertex"); 169 a_vertex->data.location = VERT_ATTRIB_GENERIC1; 170 171 nir_variable *v_pos = nir_variable_create(b.shader, nir_var_shader_out, 172 glsl_vec4_type(), "v_pos"); 173 v_pos->data.location = VARYING_SLOT_POS; 174 175 nir_copy_var(&b, v_pos, a_vertex); 176 177 /* Then we copy everything else */ 178 for (unsigned i = 0; i < blorp_key.num_inputs; i++) { 179 nir_variable *a_in = nir_variable_create(b.shader, nir_var_shader_in, 180 uvec4_type, "input"); 181 a_in->data.location = VERT_ATTRIB_GENERIC2 + i; 182 183 nir_variable *v_out = nir_variable_create(b.shader, nir_var_shader_out, 184 uvec4_type, "output"); 185 v_out->data.location = VARYING_SLOT_VAR0 + i; 186 187 nir_copy_var(&b, v_out, a_in); 188 } 189 190 struct brw_vs_prog_data vs_prog_data; 191 memset(&vs_prog_data, 0, sizeof(vs_prog_data)); 192 193 const unsigned *program = 194 blorp_compile_vs(blorp, mem_ctx, b.shader, &vs_prog_data); 195 196 bool result = 197 blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key), 198 program, vs_prog_data.base.base.program_size, 199 &vs_prog_data.base.base, sizeof(vs_prog_data), 200 ¶ms->vs_prog_kernel, ¶ms->vs_prog_data); 201 202 ralloc_free(mem_ctx); 203 return result; 204} 205 206/* The x0, y0, x1, and y1 parameters must already be populated with the render 207 * area of the framebuffer to be cleared. 208 */ 209static void 210get_fast_clear_rect(const struct isl_device *dev, 211 const struct isl_surf *aux_surf, 212 unsigned *x0, unsigned *y0, 213 unsigned *x1, unsigned *y1) 214{ 215 unsigned int x_align, y_align; 216 unsigned int x_scaledown, y_scaledown; 217 218 /* Only single sampled surfaces need to (and actually can) be resolved. */ 219 if (aux_surf->usage == ISL_SURF_USAGE_CCS_BIT) { 220 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 221 * Target(s)", beneath the "Fast Color Clear" bullet (p327): 222 * 223 * Clear pass must have a clear rectangle that must follow 224 * alignment rules in terms of pixels and lines as shown in the 225 * table below. Further, the clear-rectangle height and width 226 * must be multiple of the following dimensions. If the height 227 * and width of the render target being cleared do not meet these 228 * requirements, an MCS buffer can be created such that it 229 * follows the requirement and covers the RT. 230 * 231 * The alignment size in the table that follows is related to the 232 * alignment size that is baked into the CCS surface format but with X 233 * alignment multiplied by 16 and Y alignment multiplied by 32. 234 */ 235 x_align = isl_format_get_layout(aux_surf->format)->bw; 236 y_align = isl_format_get_layout(aux_surf->format)->bh; 237 238 x_align *= 16; 239 240 /* SKL+ line alignment requirement for Y-tiled are half those of the prior 241 * generations. 242 */ 243 if (dev->info->gen >= 9) 244 y_align *= 16; 245 else 246 y_align *= 32; 247 248 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 249 * Target(s)", beneath the "Fast Color Clear" bullet (p327): 250 * 251 * In order to optimize the performance MCS buffer (when bound to 252 * 1X RT) clear similarly to MCS buffer clear for MSRT case, 253 * clear rect is required to be scaled by the following factors 254 * in the horizontal and vertical directions: 255 * 256 * The X and Y scale down factors in the table that follows are each 257 * equal to half the alignment value computed above. 258 */ 259 x_scaledown = x_align / 2; 260 y_scaledown = y_align / 2; 261 262 /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel 263 * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color 264 * Clear of Non-MultiSampled Render Target Restrictions": 265 * 266 * Clear rectangle must be aligned to two times the number of 267 * pixels in the table shown below due to 16x16 hashing across the 268 * slice. 269 */ 270 x_align *= 2; 271 y_align *= 2; 272 } else { 273 assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT); 274 275 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render 276 * Target(s)", beneath the "MSAA Compression" bullet (p326): 277 * 278 * Clear pass for this case requires that scaled down primitive 279 * is sent down with upper left co-ordinate to coincide with 280 * actual rectangle being cleared. For MSAA, clear rectangle’s 281 * height and width need to as show in the following table in 282 * terms of (width,height) of the RT. 283 * 284 * MSAA Width of Clear Rect Height of Clear Rect 285 * 2X Ceil(1/8*width) Ceil(1/2*height) 286 * 4X Ceil(1/8*width) Ceil(1/2*height) 287 * 8X Ceil(1/2*width) Ceil(1/2*height) 288 * 16X width Ceil(1/2*height) 289 * 290 * The text "with upper left co-ordinate to coincide with actual 291 * rectangle being cleared" is a little confusing--it seems to imply 292 * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to 293 * feed the pipeline using the rectangle (x,y) to 294 * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on 295 * the number of samples. Experiments indicate that this is not 296 * quite correct; actually, what the hardware appears to do is to 297 * align whatever rectangle is sent down the pipeline to the nearest 298 * multiple of 2x2 blocks, and then scale it up by a factor of N 299 * horizontally and 2 vertically. So the resulting alignment is 4 300 * vertically and either 4 or 16 horizontally, and the scaledown 301 * factor is 2 vertically and either 2 or 8 horizontally. 302 */ 303 switch (aux_surf->format) { 304 case ISL_FORMAT_MCS_2X: 305 case ISL_FORMAT_MCS_4X: 306 x_scaledown = 8; 307 break; 308 case ISL_FORMAT_MCS_8X: 309 x_scaledown = 2; 310 break; 311 case ISL_FORMAT_MCS_16X: 312 x_scaledown = 1; 313 break; 314 default: 315 unreachable("Unexpected MCS format for fast clear"); 316 } 317 y_scaledown = 2; 318 x_align = x_scaledown * 2; 319 y_align = y_scaledown * 2; 320 } 321 322 *x0 = ROUND_DOWN_TO(*x0, x_align) / x_scaledown; 323 *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown; 324 *x1 = ALIGN(*x1, x_align) / x_scaledown; 325 *y1 = ALIGN(*y1, y_align) / y_scaledown; 326} 327 328void 329blorp_fast_clear(struct blorp_batch *batch, 330 const struct blorp_surf *surf, enum isl_format format, 331 uint32_t level, uint32_t start_layer, uint32_t num_layers, 332 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) 333{ 334 /* Ensure that all layers undergoing the clear have an auxiliary buffer. */ 335 assert(start_layer + num_layers <= 336 MAX2(surf->aux_surf->logical_level0_px.depth >> level, 337 surf->aux_surf->logical_level0_px.array_len)); 338 339 struct blorp_params params; 340 blorp_params_init(¶ms); 341 params.num_layers = num_layers; 342 343 params.x0 = x0; 344 params.y0 = y0; 345 params.x1 = x1; 346 params.y1 = y1; 347 348 memset(¶ms.wm_inputs.clear_color, 0xff, 4*sizeof(float)); 349 params.fast_clear_op = ISL_AUX_OP_FAST_CLEAR; 350 351 get_fast_clear_rect(batch->blorp->isl_dev, surf->aux_surf, 352 ¶ms.x0, ¶ms.y0, ¶ms.x1, ¶ms.y1); 353 354 if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) 355 return; 356 357 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, 358 start_layer, format, true); 359 params.num_samples = params.dst.surf.samples; 360 361 batch->blorp->exec(batch, ¶ms); 362} 363 364union isl_color_value 365swizzle_color_value(union isl_color_value src, struct isl_swizzle swizzle) 366{ 367 union isl_color_value dst = { .u32 = { 0, } }; 368 369 /* We assign colors in ABGR order so that the first one will be taken in 370 * RGBA precedence order. According to the PRM docs for shader channel 371 * select, this matches Haswell hardware behavior. 372 */ 373 if ((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4) 374 dst.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = src.u32[3]; 375 if ((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4) 376 dst.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = src.u32[2]; 377 if ((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4) 378 dst.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = src.u32[1]; 379 if ((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4) 380 dst.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = src.u32[0]; 381 382 return dst; 383} 384 385void 386blorp_clear(struct blorp_batch *batch, 387 const struct blorp_surf *surf, 388 enum isl_format format, struct isl_swizzle swizzle, 389 uint32_t level, uint32_t start_layer, uint32_t num_layers, 390 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 391 union isl_color_value clear_color, 392 const bool color_write_disable[4]) 393{ 394 struct blorp_params params; 395 blorp_params_init(¶ms); 396 397 /* Manually apply the clear destination swizzle. This way swizzled clears 398 * will work for swizzles which we can't normally use for rendering and it 399 * also ensures that they work on pre-Haswell hardware which can't swizlle 400 * at all. 401 */ 402 clear_color = swizzle_color_value(clear_color, swizzle); 403 swizzle = ISL_SWIZZLE_IDENTITY; 404 405 bool clear_rgb_as_red = false; 406 if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) { 407 clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32); 408 format = ISL_FORMAT_R32_UINT; 409 } else if (format == ISL_FORMAT_L8_UNORM_SRGB) { 410 clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]); 411 format = ISL_FORMAT_R8_UNORM; 412 } else if (format == ISL_FORMAT_A4B4G4R4_UNORM) { 413 /* Broadwell and earlier cannot render to this format so we need to work 414 * around it by swapping the colors around and using B4G4R4A4 instead. 415 */ 416 const struct isl_swizzle ARGB = ISL_SWIZZLE(ALPHA, RED, GREEN, BLUE); 417 clear_color = swizzle_color_value(clear_color, ARGB); 418 format = ISL_FORMAT_B4G4R4A4_UNORM; 419 } else if (isl_format_get_layout(format)->bpb % 3 == 0) { 420 clear_rgb_as_red = true; 421 if (format == ISL_FORMAT_R8G8B8_UNORM_SRGB) { 422 clear_color.f32[0] = util_format_linear_to_srgb_float(clear_color.f32[0]); 423 clear_color.f32[1] = util_format_linear_to_srgb_float(clear_color.f32[1]); 424 clear_color.f32[2] = util_format_linear_to_srgb_float(clear_color.f32[2]); 425 } 426 } 427 428 memcpy(¶ms.wm_inputs.clear_color, clear_color.f32, sizeof(float) * 4); 429 430 bool use_simd16_replicated_data = true; 431 432 /* From the SNB PRM (Vol4_Part1): 433 * 434 * "Replicated data (Message Type = 111) is only supported when 435 * accessing tiled memory. Using this Message Type to access linear 436 * (untiled) memory is UNDEFINED." 437 */ 438 if (surf->surf->tiling == ISL_TILING_LINEAR) 439 use_simd16_replicated_data = false; 440 441 /* Replicated clears don't work yet before gen6 */ 442 if (batch->blorp->isl_dev->info->gen < 6) 443 use_simd16_replicated_data = false; 444 445 /* Constant color writes ignore everyting in blend and color calculator 446 * state. This is not documented. 447 */ 448 if (color_write_disable) { 449 for (unsigned i = 0; i < 4; i++) { 450 params.color_write_disable[i] = color_write_disable[i]; 451 if (color_write_disable[i]) 452 use_simd16_replicated_data = false; 453 } 454 } 455 456 if (!blorp_params_get_clear_kernel(batch, ¶ms, 457 use_simd16_replicated_data, 458 clear_rgb_as_red)) 459 return; 460 461 if (!blorp_ensure_sf_program(batch, ¶ms)) 462 return; 463 464 while (num_layers > 0) { 465 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, level, 466 start_layer, format, true); 467 params.dst.view.swizzle = swizzle; 468 469 params.x0 = x0; 470 params.y0 = y0; 471 params.x1 = x1; 472 params.y1 = y1; 473 474 if (params.dst.tile_x_sa || params.dst.tile_y_sa) { 475 assert(params.dst.surf.samples == 1); 476 assert(num_layers == 1); 477 params.x0 += params.dst.tile_x_sa; 478 params.y0 += params.dst.tile_y_sa; 479 params.x1 += params.dst.tile_x_sa; 480 params.y1 += params.dst.tile_y_sa; 481 } 482 483 /* The MinLOD and MinimumArrayElement don't work properly for cube maps. 484 * Convert them to a single slice on gen4. 485 */ 486 if (batch->blorp->isl_dev->info->gen == 4 && 487 (params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) { 488 blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms.dst); 489 } 490 491 if (clear_rgb_as_red) { 492 surf_fake_rgb_with_red(batch->blorp->isl_dev, ¶ms.dst); 493 params.x0 *= 3; 494 params.x1 *= 3; 495 } 496 497 if (isl_format_is_compressed(params.dst.surf.format)) { 498 blorp_surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.dst, 499 NULL, NULL, NULL, NULL); 500 //&dst_x, &dst_y, &dst_w, &dst_h); 501 } 502 503 if (params.dst.tile_x_sa || params.dst.tile_y_sa) { 504 /* Either we're on gen4 where there is no multisampling or the 505 * surface is compressed which also implies no multisampling. 506 * Therefore, sa == px and we don't need to do a conversion. 507 */ 508 assert(params.dst.surf.samples == 1); 509 params.x0 += params.dst.tile_x_sa; 510 params.y0 += params.dst.tile_y_sa; 511 params.x1 += params.dst.tile_x_sa; 512 params.y1 += params.dst.tile_y_sa; 513 } 514 515 params.num_samples = params.dst.surf.samples; 516 517 /* We may be restricted on the number of layers we can bind at any one 518 * time. In particular, Sandy Bridge has a maximum number of layers of 519 * 512 but a maximum 3D texture size is much larger. 520 */ 521 params.num_layers = MIN2(params.dst.view.array_len, num_layers); 522 523 const unsigned max_image_width = 16 * 1024; 524 if (params.dst.surf.logical_level0_px.width > max_image_width) { 525 /* Clearing an RGB image as red multiplies the surface width by 3 526 * so it may now be too wide for the hardware surface limits. We 527 * have to break the clear up into pieces in order to clear wide 528 * images. 529 */ 530 assert(clear_rgb_as_red); 531 assert(params.dst.surf.dim == ISL_SURF_DIM_2D); 532 assert(params.dst.surf.tiling == ISL_TILING_LINEAR); 533 assert(params.dst.surf.logical_level0_px.depth == 1); 534 assert(params.dst.surf.logical_level0_px.array_len == 1); 535 assert(params.dst.surf.levels == 1); 536 assert(params.dst.surf.samples == 1); 537 assert(params.dst.tile_x_sa == 0 || params.dst.tile_y_sa == 0); 538 assert(params.dst.aux_usage == ISL_AUX_USAGE_NONE); 539 540 /* max_image_width rounded down to a multiple of 3 */ 541 const unsigned max_fake_rgb_width = (max_image_width / 3) * 3; 542 const unsigned cpp = 543 isl_format_get_layout(params.dst.surf.format)->bpb / 8; 544 545 params.dst.surf.logical_level0_px.width = max_fake_rgb_width; 546 params.dst.surf.phys_level0_sa.width = max_fake_rgb_width; 547 548 uint32_t orig_x0 = params.x0, orig_x1 = params.x1; 549 uint64_t orig_offset = params.dst.addr.offset; 550 for (uint32_t x = orig_x0; x < orig_x1; x += max_fake_rgb_width) { 551 /* Offset to the surface. It's easy because we're linear */ 552 params.dst.addr.offset = orig_offset + x * cpp; 553 554 params.x0 = 0; 555 params.x1 = MIN2(orig_x1 - x, max_image_width); 556 557 batch->blorp->exec(batch, ¶ms); 558 } 559 } else { 560 batch->blorp->exec(batch, ¶ms); 561 } 562 563 start_layer += params.num_layers; 564 num_layers -= params.num_layers; 565 } 566} 567 568void 569blorp_clear_depth_stencil(struct blorp_batch *batch, 570 const struct blorp_surf *depth, 571 const struct blorp_surf *stencil, 572 uint32_t level, uint32_t start_layer, 573 uint32_t num_layers, 574 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 575 bool clear_depth, float depth_value, 576 uint8_t stencil_mask, uint8_t stencil_value) 577{ 578 struct blorp_params params; 579 blorp_params_init(¶ms); 580 581 params.x0 = x0; 582 params.y0 = y0; 583 params.x1 = x1; 584 params.y1 = y1; 585 586 if (ISL_DEV_GEN(batch->blorp->isl_dev) == 6) { 587 /* For some reason, Sandy Bridge gets occlusion queries wrong if we 588 * don't have a shader. In particular, it records samples even though 589 * we disable statistics in 3DSTATE_WM. Give it the usual clear shader 590 * to work around the issue. 591 */ 592 if (!blorp_params_get_clear_kernel(batch, ¶ms, false, false)) 593 return; 594 } 595 596 while (num_layers > 0) { 597 params.num_layers = num_layers; 598 599 if (stencil_mask) { 600 brw_blorp_surface_info_init(batch->blorp, ¶ms.stencil, stencil, 601 level, start_layer, 602 ISL_FORMAT_UNSUPPORTED, true); 603 params.stencil_mask = stencil_mask; 604 params.stencil_ref = stencil_value; 605 606 params.dst.surf.samples = params.stencil.surf.samples; 607 params.dst.surf.logical_level0_px = 608 params.stencil.surf.logical_level0_px; 609 params.dst.view = params.depth.view; 610 611 params.num_samples = params.stencil.surf.samples; 612 613 /* We may be restricted on the number of layers we can bind at any 614 * one time. In particular, Sandy Bridge has a maximum number of 615 * layers of 512 but a maximum 3D texture size is much larger. 616 */ 617 if (params.stencil.view.array_len < params.num_layers) 618 params.num_layers = params.stencil.view.array_len; 619 } 620 621 if (clear_depth) { 622 brw_blorp_surface_info_init(batch->blorp, ¶ms.depth, depth, 623 level, start_layer, 624 ISL_FORMAT_UNSUPPORTED, true); 625 params.z = depth_value; 626 params.depth_format = 627 isl_format_get_depth_format(depth->surf->format, false); 628 629 params.dst.surf.samples = params.depth.surf.samples; 630 params.dst.surf.logical_level0_px = 631 params.depth.surf.logical_level0_px; 632 params.dst.view = params.depth.view; 633 634 params.num_samples = params.depth.surf.samples; 635 636 /* We may be restricted on the number of layers we can bind at any 637 * one time. In particular, Sandy Bridge has a maximum number of 638 * layers of 512 but a maximum 3D texture size is much larger. 639 */ 640 if (params.depth.view.array_len < params.num_layers) 641 params.num_layers = params.depth.view.array_len; 642 } 643 644 batch->blorp->exec(batch, ¶ms); 645 646 start_layer += params.num_layers; 647 num_layers -= params.num_layers; 648 } 649} 650 651bool 652blorp_can_hiz_clear_depth(uint8_t gen, enum isl_format format, 653 uint32_t num_samples, 654 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1) 655{ 656 /* This function currently doesn't support any gen prior to gen8 */ 657 assert(gen >= 8); 658 659 if (gen == 8 && format == ISL_FORMAT_R16_UNORM) { 660 /* Apply the D16 alignment restrictions. On BDW, HiZ has an 8x4 sample 661 * block with the following property: as the number of samples increases, 662 * the number of pixels representable by this block decreases by a factor 663 * of the sample dimensions. Sample dimensions scale following the MSAA 664 * interleaved pattern. 665 * 666 * Sample|Sample|Pixel 667 * Count |Dim |Dim 668 * =================== 669 * 1 | 1x1 | 8x4 670 * 2 | 2x1 | 4x4 671 * 4 | 2x2 | 4x2 672 * 8 | 4x2 | 2x2 673 * 16 | 4x4 | 2x1 674 * 675 * Table: Pixel Dimensions in a HiZ Sample Block Pre-SKL 676 */ 677 const struct isl_extent2d sa_block_dim = 678 isl_get_interleaved_msaa_px_size_sa(num_samples); 679 const uint8_t align_px_w = 8 / sa_block_dim.w; 680 const uint8_t align_px_h = 4 / sa_block_dim.h; 681 682 /* Fast depth clears clear an entire sample block at a time. As a result, 683 * the rectangle must be aligned to the dimensions of the encompassing 684 * pixel block for a successful operation. 685 * 686 * Fast clears can still work if the upper-left corner is aligned and the 687 * bottom-rigtht corner touches the edge of a depth buffer whose extent 688 * is unaligned. This is because each miplevel in the depth buffer is 689 * padded by the Pixel Dim (similar to a standard compressed texture). 690 * In this case, the clear rectangle could be padded by to match the full 691 * depth buffer extent but to support multiple clearing techniques, we 692 * chose to be unaware of the depth buffer's extent and thus don't handle 693 * this case. 694 */ 695 if (x0 % align_px_w || y0 % align_px_h || 696 x1 % align_px_w || y1 % align_px_h) 697 return false; 698 } 699 return true; 700} 701 702void 703blorp_hiz_clear_depth_stencil(struct blorp_batch *batch, 704 const struct blorp_surf *depth, 705 const struct blorp_surf *stencil, 706 uint32_t level, 707 uint32_t start_layer, uint32_t num_layers, 708 uint32_t x0, uint32_t y0, 709 uint32_t x1, uint32_t y1, 710 bool clear_depth, float depth_value, 711 bool clear_stencil, uint8_t stencil_value) 712{ 713 struct blorp_params params; 714 blorp_params_init(¶ms); 715 716 /* This requires WM_HZ_OP which only exists on gen8+ */ 717 assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 8); 718 719 params.hiz_op = ISL_AUX_OP_FAST_CLEAR; 720 params.num_layers = 1; 721 722 params.x0 = x0; 723 params.y0 = y0; 724 params.x1 = x1; 725 params.y1 = y1; 726 727 for (uint32_t l = 0; l < num_layers; l++) { 728 const uint32_t layer = start_layer + l; 729 if (clear_stencil) { 730 brw_blorp_surface_info_init(batch->blorp, ¶ms.stencil, stencil, 731 level, layer, 732 ISL_FORMAT_UNSUPPORTED, true); 733 params.stencil_mask = 0xff; 734 params.stencil_ref = stencil_value; 735 params.num_samples = params.stencil.surf.samples; 736 } 737 738 if (clear_depth) { 739 /* If we're clearing depth, we must have HiZ */ 740 assert(depth && depth->aux_usage == ISL_AUX_USAGE_HIZ); 741 742 brw_blorp_surface_info_init(batch->blorp, ¶ms.depth, depth, 743 level, layer, 744 ISL_FORMAT_UNSUPPORTED, true); 745 params.depth.clear_color.f32[0] = depth_value; 746 params.depth_format = 747 isl_format_get_depth_format(depth->surf->format, false); 748 params.num_samples = params.depth.surf.samples; 749 } 750 751 batch->blorp->exec(batch, ¶ms); 752 } 753} 754 755/* Given a depth stencil attachment, this function performs a fast depth clear 756 * on a depth portion and a regular clear on the stencil portion. When 757 * performing a fast depth clear on the depth portion, the HiZ buffer is simply 758 * tagged as cleared so the depth clear value is not actually needed. 759 */ 760void 761blorp_gen8_hiz_clear_attachments(struct blorp_batch *batch, 762 uint32_t num_samples, 763 uint32_t x0, uint32_t y0, 764 uint32_t x1, uint32_t y1, 765 bool clear_depth, bool clear_stencil, 766 uint8_t stencil_value) 767{ 768 assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 769 770 struct blorp_params params; 771 blorp_params_init(¶ms); 772 params.num_layers = 1; 773 params.hiz_op = ISL_AUX_OP_FAST_CLEAR; 774 params.x0 = x0; 775 params.y0 = y0; 776 params.x1 = x1; 777 params.y1 = y1; 778 params.num_samples = num_samples; 779 params.depth.enabled = clear_depth; 780 params.stencil.enabled = clear_stencil; 781 params.stencil_ref = stencil_value; 782 batch->blorp->exec(batch, ¶ms); 783} 784 785/** Clear active color/depth/stencili attachments 786 * 787 * This function performs a clear operation on the currently bound 788 * color/depth/stencil attachments. It is assumed that any information passed 789 * in here is valid, consistent, and in-bounds relative to the currently 790 * attached depth/stencil. The binding_table_offset parameter is the 32-bit 791 * offset relative to surface state base address where pre-baked binding table 792 * that we are to use lives. If clear_color is false, binding_table_offset 793 * must point to a binding table with one entry which is a valid null surface 794 * that matches the currently bound depth and stencil. 795 */ 796void 797blorp_clear_attachments(struct blorp_batch *batch, 798 uint32_t binding_table_offset, 799 enum isl_format depth_format, 800 uint32_t num_samples, 801 uint32_t start_layer, uint32_t num_layers, 802 uint32_t x0, uint32_t y0, uint32_t x1, uint32_t y1, 803 bool clear_color, union isl_color_value color_value, 804 bool clear_depth, float depth_value, 805 uint8_t stencil_mask, uint8_t stencil_value) 806{ 807 struct blorp_params params; 808 blorp_params_init(¶ms); 809 810 assert(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); 811 812 params.x0 = x0; 813 params.y0 = y0; 814 params.x1 = x1; 815 params.y1 = y1; 816 817 params.use_pre_baked_binding_table = true; 818 params.pre_baked_binding_table_offset = binding_table_offset; 819 820 params.num_layers = num_layers; 821 params.num_samples = num_samples; 822 823 if (clear_color) { 824 params.dst.enabled = true; 825 826 memcpy(¶ms.wm_inputs.clear_color, color_value.f32, sizeof(float) * 4); 827 828 /* Unfortunately, without knowing whether or not our destination surface 829 * is tiled or not, we have to assume it may be linear. This means no 830 * SIMD16_REPDATA for us. :-( 831 */ 832 if (!blorp_params_get_clear_kernel(batch, ¶ms, false, false)) 833 return; 834 } 835 836 if (clear_depth) { 837 params.depth.enabled = true; 838 839 params.z = depth_value; 840 params.depth_format = isl_format_get_depth_format(depth_format, false); 841 } 842 843 if (stencil_mask) { 844 params.stencil.enabled = true; 845 846 params.stencil_mask = stencil_mask; 847 params.stencil_ref = stencil_value; 848 } 849 850 if (!blorp_params_get_layer_offset_vs(batch, ¶ms)) 851 return; 852 853 params.vs_inputs.base_layer = start_layer; 854 855 batch->blorp->exec(batch, ¶ms); 856} 857 858void 859blorp_ccs_resolve(struct blorp_batch *batch, 860 struct blorp_surf *surf, uint32_t level, 861 uint32_t start_layer, uint32_t num_layers, 862 enum isl_format format, 863 enum isl_aux_op resolve_op) 864{ 865 struct blorp_params params; 866 867 blorp_params_init(¶ms); 868 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, 869 level, start_layer, format, true); 870 871 /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve": 872 * 873 * A rectangle primitive must be scaled down by the following factors 874 * with respect to render target being resolved. 875 * 876 * The scaledown factors in the table that follows are related to the block 877 * size of the CCS format. For IVB and HSW, we divide by two, for BDW we 878 * multiply by 8 and 16. On Sky Lake, we multiply by 8. 879 */ 880 const struct isl_format_layout *aux_fmtl = 881 isl_format_get_layout(params.dst.aux_surf.format); 882 assert(aux_fmtl->txc == ISL_TXC_CCS); 883 884 unsigned x_scaledown, y_scaledown; 885 if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 9) { 886 x_scaledown = aux_fmtl->bw * 8; 887 y_scaledown = aux_fmtl->bh * 8; 888 } else if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) { 889 x_scaledown = aux_fmtl->bw * 8; 890 y_scaledown = aux_fmtl->bh * 16; 891 } else { 892 x_scaledown = aux_fmtl->bw / 2; 893 y_scaledown = aux_fmtl->bh / 2; 894 } 895 params.x0 = params.y0 = 0; 896 params.x1 = minify(params.dst.aux_surf.logical_level0_px.width, level); 897 params.y1 = minify(params.dst.aux_surf.logical_level0_px.height, level); 898 params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown; 899 params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown; 900 901 if (batch->blorp->isl_dev->info->gen >= 9) { 902 assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE || 903 resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE); 904 } else { 905 /* Broadwell and earlier do not have a partial resolve */ 906 assert(resolve_op == ISL_AUX_OP_FULL_RESOLVE); 907 } 908 params.fast_clear_op = resolve_op; 909 params.num_layers = num_layers; 910 911 /* Note: there is no need to initialize push constants because it doesn't 912 * matter what data gets dispatched to the render target. However, we must 913 * ensure that the fragment shader delivers the data using the "replicated 914 * color" message. 915 */ 916 917 if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) 918 return; 919 920 batch->blorp->exec(batch, ¶ms); 921} 922 923static nir_ssa_def * 924blorp_nir_bit(nir_builder *b, nir_ssa_def *src, unsigned bit) 925{ 926 return nir_iand(b, nir_ushr(b, src, nir_imm_int(b, bit)), 927 nir_imm_int(b, 1)); 928} 929 930struct blorp_mcs_partial_resolve_key 931{ 932 enum blorp_shader_type shader_type; 933 bool indirect_clear_color; 934 bool int_format; 935 uint32_t num_samples; 936}; 937 938static bool 939blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch, 940 struct blorp_params *params) 941{ 942 struct blorp_context *blorp = batch->blorp; 943 const struct blorp_mcs_partial_resolve_key blorp_key = { 944 .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE, 945 .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL, 946 .int_format = isl_format_has_int_channel(params->dst.view.format), 947 .num_samples = params->num_samples, 948 }; 949 950 if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key), 951 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data)) 952 return true; 953 954 void *mem_ctx = ralloc_context(NULL); 955 956 nir_builder b; 957 blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, 958 "BLORP-mcs-partial-resolve"); 959 960 nir_variable *v_color = 961 BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type()); 962 963 nir_variable *frag_color = 964 nir_variable_create(b.shader, nir_var_shader_out, 965 glsl_vec4_type(), "gl_FragColor"); 966 frag_color->data.location = FRAG_RESULT_COLOR; 967 968 /* Do an MCS fetch and check if it is equal to the magic clear value */ 969 nir_ssa_def *mcs = 970 blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, blorp_nir_frag_coord(&b)), 971 nir_load_layer_id(&b)); 972 nir_ssa_def *is_clear = 973 blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples); 974 975 /* If we aren't the clear value, discard. */ 976 nir_intrinsic_instr *discard = 977 nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if); 978 discard->src[0] = nir_src_for_ssa(nir_inot(&b, is_clear)); 979 nir_builder_instr_insert(&b, &discard->instr); 980 981 nir_ssa_def *clear_color = nir_load_var(&b, v_color); 982 if (blorp_key.indirect_clear_color && blorp->isl_dev->info->gen <= 8) { 983 /* Gen7-8 clear colors are stored as single 0/1 bits */ 984 clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31), 985 blorp_nir_bit(&b, clear_color, 30), 986 blorp_nir_bit(&b, clear_color, 29), 987 blorp_nir_bit(&b, clear_color, 28)); 988 989 if (!blorp_key.int_format) 990 clear_color = nir_i2f32(&b, clear_color); 991 } 992 nir_store_var(&b, frag_color, clear_color, 0xf); 993 994 struct brw_wm_prog_key wm_key; 995 brw_blorp_init_wm_prog_key(&wm_key); 996 wm_key.tex.compressed_multisample_layout_mask = 1; 997 wm_key.tex.msaa_16 = blorp_key.num_samples == 16; 998 wm_key.multisample_fbo = true; 999 1000 struct brw_wm_prog_data prog_data; 1001 const unsigned *program = 1002 blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, false, 1003 &prog_data); 1004 1005 bool result = 1006 blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key), 1007 program, prog_data.base.program_size, 1008 &prog_data.base, sizeof(prog_data), 1009 ¶ms->wm_prog_kernel, ¶ms->wm_prog_data); 1010 1011 ralloc_free(mem_ctx); 1012 return result; 1013} 1014 1015void 1016blorp_mcs_partial_resolve(struct blorp_batch *batch, 1017 struct blorp_surf *surf, 1018 enum isl_format format, 1019 uint32_t start_layer, uint32_t num_layers) 1020{ 1021 struct blorp_params params; 1022 blorp_params_init(¶ms); 1023 1024 assert(batch->blorp->isl_dev->info->gen >= 7); 1025 1026 params.x0 = 0; 1027 params.y0 = 0; 1028 params.x1 = surf->surf->logical_level0_px.width; 1029 params.y1 = surf->surf->logical_level0_px.height; 1030 1031 brw_blorp_surface_info_init(batch->blorp, ¶ms.src, surf, 0, 1032 start_layer, format, false); 1033 brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, 0, 1034 start_layer, format, true); 1035 1036 params.num_samples = params.dst.surf.samples; 1037 params.num_layers = num_layers; 1038 params.dst_clear_color_as_input = surf->clear_color_addr.buffer != NULL; 1039 1040 memcpy(¶ms.wm_inputs.clear_color, 1041 surf->clear_color.f32, sizeof(float) * 4); 1042 1043 if (!blorp_params_get_mcs_partial_resolve_kernel(batch, ¶ms)) 1044 return; 1045 1046 batch->blorp->exec(batch, ¶ms); 1047} 1048 1049/** Clear a CCS to the "uncompressed" state 1050 * 1051 * This pass is the CCS equivalent of a "HiZ resolve". It sets the CCS values 1052 * for a given layer/level of a surface to 0x0 which is the "uncompressed" 1053 * state which tells the sampler to go look at the main surface. 1054 */ 1055void 1056blorp_ccs_ambiguate(struct blorp_batch *batch, 1057 struct blorp_surf *surf, 1058 uint32_t level, uint32_t layer) 1059{ 1060 struct blorp_params params; 1061 blorp_params_init(¶ms); 1062 1063 assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7); 1064 1065 const struct isl_format_layout *aux_fmtl = 1066 isl_format_get_layout(surf->aux_surf->format); 1067 assert(aux_fmtl->txc == ISL_TXC_CCS); 1068 1069 params.dst = (struct brw_blorp_surface_info) { 1070 .enabled = true, 1071 .addr = surf->aux_addr, 1072 .view = { 1073 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, 1074 .format = ISL_FORMAT_R32G32B32A32_UINT, 1075 .base_level = 0, 1076 .base_array_layer = 0, 1077 .levels = 1, 1078 .array_len = 1, 1079 .swizzle = ISL_SWIZZLE_IDENTITY, 1080 }, 1081 }; 1082 1083 uint32_t z = 0; 1084 if (surf->surf->dim == ISL_SURF_DIM_3D) { 1085 z = layer; 1086 layer = 0; 1087 } 1088 1089 uint32_t offset_B, x_offset_el, y_offset_el; 1090 isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z, 1091 &x_offset_el, &y_offset_el); 1092 isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb, 1093 surf->aux_surf->row_pitch_B, 1094 x_offset_el, y_offset_el, 1095 &offset_B, &x_offset_el, &y_offset_el); 1096 params.dst.addr.offset += offset_B; 1097 1098 const uint32_t width_px = 1099 minify(surf->aux_surf->logical_level0_px.width, level); 1100 const uint32_t height_px = 1101 minify(surf->aux_surf->logical_level0_px.height, level); 1102 const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw); 1103 const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh); 1104 1105 struct isl_tile_info ccs_tile_info; 1106 isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info); 1107 1108 /* We're going to map it as a regular RGBA32_UINT surface. We need to 1109 * downscale a good deal. We start by computing the area on the CCS to 1110 * clear in units of Y-tiled cache lines. 1111 */ 1112 uint32_t x_offset_cl, y_offset_cl, width_cl, height_cl; 1113 if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) { 1114 /* From the Sky Lake PRM Vol. 12 in the section on planes: 1115 * 1116 * "The Color Control Surface (CCS) contains the compression status 1117 * of the cache-line pairs. The compression state of the cache-line 1118 * pair is specified by 2 bits in the CCS. Each CCS cache-line 1119 * represents an area on the main surface of 16x16 sets of 128 byte 1120 * Y-tiled cache-line-pairs. CCS is always Y tiled." 1121 * 1122 * Each 2-bit surface element in the CCS corresponds to a single 1123 * cache-line pair in the main surface. This means that 16x16 el block 1124 * in the CCS maps to a Y-tiled cache line. Fortunately, CCS layouts 1125 * are calculated with a very large alignment so we can round up to a 1126 * whole cache line without worrying about overdraw. 1127 */ 1128 1129 /* On Broadwell and above, a CCS tile is the same as a Y tile when 1130 * viewed at the cache-line granularity. Fortunately, the horizontal 1131 * and vertical alignment requirements of the CCS are such that we can 1132 * align to an entire cache line without worrying about crossing over 1133 * from one LOD to another. 1134 */ 1135 const uint32_t x_el_per_cl = ccs_tile_info.logical_extent_el.w / 8; 1136 const uint32_t y_el_per_cl = ccs_tile_info.logical_extent_el.h / 8; 1137 assert(surf->aux_surf->image_alignment_el.w % x_el_per_cl == 0); 1138 assert(surf->aux_surf->image_alignment_el.h % y_el_per_cl == 0); 1139 1140 assert(x_offset_el % x_el_per_cl == 0); 1141 assert(y_offset_el % y_el_per_cl == 0); 1142 x_offset_cl = x_offset_el / x_el_per_cl; 1143 y_offset_cl = y_offset_el / y_el_per_cl; 1144 width_cl = DIV_ROUND_UP(width_el, x_el_per_cl); 1145 height_cl = DIV_ROUND_UP(height_el, y_el_per_cl); 1146 } else { 1147 /* On gen7, the CCS tiling is not so nice. However, there we are 1148 * guaranteed that we only have a single level and slice so we don't 1149 * have to worry about it and can just align to a whole tile. 1150 */ 1151 assert(surf->aux_surf->logical_level0_px.depth == 1); 1152 assert(surf->aux_surf->logical_level0_px.array_len == 1); 1153 assert(x_offset_el == 0 && y_offset_el == 0); 1154 const uint32_t width_tl = 1155 DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w); 1156 const uint32_t height_tl = 1157 DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h); 1158 x_offset_cl = 0; 1159 y_offset_cl = 0; 1160 width_cl = width_tl * 8; 1161 height_cl = height_tl * 8; 1162 } 1163 1164 /* We're going to use a RGBA32 format so as to write data as quickly as 1165 * possible. A y-tiled cache line will then be 1x4 px. 1166 */ 1167 const uint32_t x_offset_rgba_px = x_offset_cl; 1168 const uint32_t y_offset_rgba_px = y_offset_cl * 4; 1169 const uint32_t width_rgba_px = width_cl; 1170 const uint32_t height_rgba_px = height_cl * 4; 1171 1172 MAYBE_UNUSED bool ok = 1173 isl_surf_init(batch->blorp->isl_dev, ¶ms.dst.surf, 1174 .dim = ISL_SURF_DIM_2D, 1175 .format = ISL_FORMAT_R32G32B32A32_UINT, 1176 .width = width_rgba_px + x_offset_rgba_px, 1177 .height = height_rgba_px + y_offset_rgba_px, 1178 .depth = 1, 1179 .levels = 1, 1180 .array_len = 1, 1181 .samples = 1, 1182 .row_pitch_B = surf->aux_surf->row_pitch_B, 1183 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, 1184 .tiling_flags = ISL_TILING_Y0_BIT); 1185 assert(ok); 1186 1187 params.x0 = x_offset_rgba_px; 1188 params.y0 = y_offset_rgba_px; 1189 params.x1 = x_offset_rgba_px + width_rgba_px; 1190 params.y1 = y_offset_rgba_px + height_rgba_px; 1191 1192 /* A CCS value of 0 means "uncompressed." */ 1193 memset(¶ms.wm_inputs.clear_color, 0, 1194 sizeof(params.wm_inputs.clear_color)); 1195 1196 if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false)) 1197 return; 1198 1199 batch->blorp->exec(batch, ¶ms); 1200} 1201