radv_meta_clear.c revision ed98bd31
1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "radv_debug.h"
25#include "radv_meta.h"
26#include "radv_private.h"
27#include "nir/nir_builder.h"
28
29#include "util/format_rgb9e5.h"
30#include "vk_format.h"
31
32enum {
33	DEPTH_CLEAR_SLOW,
34	DEPTH_CLEAR_FAST_EXPCLEAR,
35	DEPTH_CLEAR_FAST_NO_EXPCLEAR
36};
37
38static void
39build_color_shaders(struct nir_shader **out_vs,
40                    struct nir_shader **out_fs,
41                    uint32_t frag_output)
42{
43	nir_builder vs_b;
44	nir_builder fs_b;
45
46	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
47	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
48
49	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
50	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
51
52	const struct glsl_type *position_type = glsl_vec4_type();
53	const struct glsl_type *color_type = glsl_vec4_type();
54
55	nir_variable *vs_out_pos =
56		nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
57				    "gl_Position");
58	vs_out_pos->data.location = VARYING_SLOT_POS;
59
60	nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
61	nir_intrinsic_set_base(in_color_load, 0);
62	nir_intrinsic_set_range(in_color_load, 16);
63	in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
64	in_color_load->num_components = 4;
65	nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 4, 32, "clear color");
66	nir_builder_instr_insert(&fs_b, &in_color_load->instr);
67
68	nir_variable *fs_out_color =
69		nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
70				    "f_color");
71	fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
72
73	nir_store_var(&fs_b, fs_out_color, &in_color_load->dest.ssa, 0xf);
74
75	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
76	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
77
78	const struct glsl_type *layer_type = glsl_int_type();
79	nir_variable *vs_out_layer =
80		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
81				    "v_layer");
82	vs_out_layer->data.location = VARYING_SLOT_LAYER;
83	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
84	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
85	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
86
87	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
88	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
89
90	*out_vs = vs_b.shader;
91	*out_fs = fs_b.shader;
92}
93
94static VkResult
95create_pipeline(struct radv_device *device,
96		struct radv_render_pass *render_pass,
97		uint32_t samples,
98                struct nir_shader *vs_nir,
99                struct nir_shader *fs_nir,
100                const VkPipelineVertexInputStateCreateInfo *vi_state,
101                const VkPipelineDepthStencilStateCreateInfo *ds_state,
102                const VkPipelineColorBlendStateCreateInfo *cb_state,
103		const VkPipelineLayout layout,
104		const struct radv_graphics_pipeline_create_info *extra,
105                const VkAllocationCallbacks *alloc,
106		VkPipeline *pipeline)
107{
108	VkDevice device_h = radv_device_to_handle(device);
109	VkResult result;
110
111	struct radv_shader_module vs_m = { .nir = vs_nir };
112	struct radv_shader_module fs_m = { .nir = fs_nir };
113
114	result = radv_graphics_pipeline_create(device_h,
115					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
116					       &(VkGraphicsPipelineCreateInfo) {
117						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
118							       .stageCount = fs_nir ? 2 : 1,
119							       .pStages = (VkPipelineShaderStageCreateInfo[]) {
120							       {
121								       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
122								       .stage = VK_SHADER_STAGE_VERTEX_BIT,
123								       .module = radv_shader_module_to_handle(&vs_m),
124								       .pName = "main",
125							       },
126							       {
127								       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128								       .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
129								       .module = radv_shader_module_to_handle(&fs_m),
130								       .pName = "main",
131							       },
132						       },
133							       .pVertexInputState = vi_state,
134									.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
135							       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
136							       .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
137							       .primitiveRestartEnable = false,
138						       },
139									.pViewportState = &(VkPipelineViewportStateCreateInfo) {
140							       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
141							       .viewportCount = 1,
142							       .scissorCount = 1,
143						       },
144										 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
145							       .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
146							       .rasterizerDiscardEnable = false,
147							       .polygonMode = VK_POLYGON_MODE_FILL,
148							       .cullMode = VK_CULL_MODE_NONE,
149							       .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
150							       .depthBiasEnable = false,
151						       },
152											  .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
153							       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
154							       .rasterizationSamples = samples,
155							       .sampleShadingEnable = false,
156							       .pSampleMask = NULL,
157							       .alphaToCoverageEnable = false,
158							       .alphaToOneEnable = false,
159						       },
160												   .pDepthStencilState = ds_state,
161													    .pColorBlendState = cb_state,
162													    .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
163							       /* The meta clear pipeline declares all state as dynamic.
164								* As a consequence, vkCmdBindPipeline writes no dynamic state
165								* to the cmd buffer. Therefore, at the end of the meta clear,
166								* we need only restore dynamic state was vkCmdSet.
167								*/
168							       .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
169							       .dynamicStateCount = 8,
170							       .pDynamicStates = (VkDynamicState[]) {
171								       /* Everything except stencil write mask */
172								       VK_DYNAMIC_STATE_VIEWPORT,
173								       VK_DYNAMIC_STATE_SCISSOR,
174								       VK_DYNAMIC_STATE_LINE_WIDTH,
175								       VK_DYNAMIC_STATE_DEPTH_BIAS,
176								       VK_DYNAMIC_STATE_BLEND_CONSTANTS,
177								       VK_DYNAMIC_STATE_DEPTH_BOUNDS,
178								       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
179								       VK_DYNAMIC_STATE_STENCIL_REFERENCE,
180							       },
181						       },
182						    .layout = layout,
183						    .flags = 0,
184						    .renderPass = radv_render_pass_to_handle(render_pass),
185						    .subpass = 0,
186						},
187					       extra,
188					       alloc,
189					       pipeline);
190
191	ralloc_free(vs_nir);
192	ralloc_free(fs_nir);
193
194	return result;
195}
196
197static VkResult
198create_color_renderpass(struct radv_device *device,
199			VkFormat vk_format,
200			uint32_t samples,
201			VkRenderPass *pass)
202{
203	mtx_lock(&device->meta_state.mtx);
204	if (*pass) {
205		mtx_unlock (&device->meta_state.mtx);
206		return VK_SUCCESS;
207	}
208
209	VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
210				       &(VkRenderPassCreateInfo) {
211					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
212						       .attachmentCount = 1,
213						       .pAttachments = &(VkAttachmentDescription) {
214						       .format = vk_format,
215						       .samples = samples,
216						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
217						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
218						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
219						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
220					       },
221						       .subpassCount = 1,
222								.pSubpasses = &(VkSubpassDescription) {
223						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
224						       .inputAttachmentCount = 0,
225						       .colorAttachmentCount = 1,
226						       .pColorAttachments = &(VkAttachmentReference) {
227							       .attachment = 0,
228							       .layout = VK_IMAGE_LAYOUT_GENERAL,
229						       },
230						       .pResolveAttachments = NULL,
231						       .pDepthStencilAttachment = &(VkAttachmentReference) {
232							       .attachment = VK_ATTACHMENT_UNUSED,
233							       .layout = VK_IMAGE_LAYOUT_GENERAL,
234						       },
235						       .preserveAttachmentCount = 0,
236						       .pPreserveAttachments = NULL,
237					       },
238								.dependencyCount = 0,
239									 }, &device->meta_state.alloc, pass);
240	mtx_unlock(&device->meta_state.mtx);
241	return result;
242}
243
244static VkResult
245create_color_pipeline(struct radv_device *device,
246		      uint32_t samples,
247                      uint32_t frag_output,
248		      VkPipeline *pipeline,
249		      VkRenderPass pass)
250{
251	struct nir_shader *vs_nir;
252	struct nir_shader *fs_nir;
253	VkResult result;
254
255	mtx_lock(&device->meta_state.mtx);
256	if (*pipeline) {
257		mtx_unlock(&device->meta_state.mtx);
258		return VK_SUCCESS;
259	}
260
261	build_color_shaders(&vs_nir, &fs_nir, frag_output);
262
263	const VkPipelineVertexInputStateCreateInfo vi_state = {
264		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
265		.vertexBindingDescriptionCount = 0,
266		.vertexAttributeDescriptionCount = 0,
267	};
268
269	const VkPipelineDepthStencilStateCreateInfo ds_state = {
270		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
271		.depthTestEnable = false,
272		.depthWriteEnable = false,
273		.depthBoundsTestEnable = false,
274		.stencilTestEnable = false,
275	};
276
277	VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 };
278	blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) {
279		.blendEnable = false,
280		.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
281		VK_COLOR_COMPONENT_R_BIT |
282		VK_COLOR_COMPONENT_G_BIT |
283		VK_COLOR_COMPONENT_B_BIT,
284	};
285
286	const VkPipelineColorBlendStateCreateInfo cb_state = {
287		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
288		.logicOpEnable = false,
289		.attachmentCount = MAX_RTS,
290		.pAttachments = blend_attachment_state
291	};
292
293
294	struct radv_graphics_pipeline_create_info extra = {
295		.use_rectlist = true,
296	};
297	result = create_pipeline(device, radv_render_pass_from_handle(pass),
298				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
299				 device->meta_state.clear_color_p_layout,
300				 &extra, &device->meta_state.alloc, pipeline);
301
302	mtx_unlock(&device->meta_state.mtx);
303	return result;
304}
305
306static void
307finish_meta_clear_htile_mask_state(struct radv_device *device)
308{
309	struct radv_meta_state *state = &device->meta_state;
310
311	radv_DestroyPipeline(radv_device_to_handle(device),
312			     state->clear_htile_mask_pipeline,
313			     &state->alloc);
314	radv_DestroyPipelineLayout(radv_device_to_handle(device),
315				   state->clear_htile_mask_p_layout,
316				   &state->alloc);
317	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
318					state->clear_htile_mask_ds_layout,
319					&state->alloc);
320}
321
322void
323radv_device_finish_meta_clear_state(struct radv_device *device)
324{
325	struct radv_meta_state *state = &device->meta_state;
326
327	for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
328		for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
329			radv_DestroyPipeline(radv_device_to_handle(device),
330					     state->clear[i].color_pipelines[j],
331					     &state->alloc);
332			radv_DestroyRenderPass(radv_device_to_handle(device),
333					       state->clear[i].render_pass[j],
334					       &state->alloc);
335		}
336
337		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
338			radv_DestroyPipeline(radv_device_to_handle(device),
339					     state->clear[i].depth_only_pipeline[j],
340					     &state->alloc);
341			radv_DestroyPipeline(radv_device_to_handle(device),
342					     state->clear[i].stencil_only_pipeline[j],
343					     &state->alloc);
344			radv_DestroyPipeline(radv_device_to_handle(device),
345					     state->clear[i].depthstencil_pipeline[j],
346					     &state->alloc);
347		}
348		radv_DestroyRenderPass(radv_device_to_handle(device),
349				      state->clear[i].depthstencil_rp,
350				      &state->alloc);
351	}
352	radv_DestroyPipelineLayout(radv_device_to_handle(device),
353				   state->clear_color_p_layout,
354				   &state->alloc);
355	radv_DestroyPipelineLayout(radv_device_to_handle(device),
356				   state->clear_depth_p_layout,
357				   &state->alloc);
358
359	finish_meta_clear_htile_mask_state(device);
360}
361
362static void
363emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
364                 const VkClearAttachment *clear_att,
365                 const VkClearRect *clear_rect,
366                 uint32_t view_mask)
367{
368	struct radv_device *device = cmd_buffer->device;
369	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
370	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
371	const uint32_t subpass_att = clear_att->colorAttachment;
372	const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
373	const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
374	uint32_t samples, samples_log2;
375	VkFormat format;
376	unsigned fs_key;
377	VkClearColorValue clear_value = clear_att->clearValue.color;
378	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
379	VkPipeline pipeline;
380
381	/* When a framebuffer is bound to the current command buffer, get the
382	 * number of samples from it. Otherwise, get the number of samples from
383	 * the render pass because it's likely a secondary command buffer.
384	 */
385	if (iview) {
386		samples = iview->image->info.samples;
387		format = iview->vk_format;
388	} else {
389		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
390		format = cmd_buffer->state.pass->attachments[pass_att].format;
391	}
392
393	samples_log2 = ffs(samples) - 1;
394	fs_key = radv_format_meta_fs_key(format);
395
396	if (fs_key == -1) {
397		radv_finishme("color clears incomplete");
398		return;
399	}
400
401	if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
402		VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key],
403		                                       samples,
404		                                       &device->meta_state.clear[samples_log2].render_pass[fs_key]);
405		if (ret != VK_SUCCESS) {
406			cmd_buffer->record_result = ret;
407			return;
408		}
409	}
410
411	if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
412		VkResult ret = create_color_pipeline(device, samples, 0,
413		                                     &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
414		                                     device->meta_state.clear[samples_log2].render_pass[fs_key]);
415		if (ret != VK_SUCCESS) {
416			cmd_buffer->record_result = ret;
417			return;
418		}
419	}
420
421	pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
422	if (!pipeline) {
423		radv_finishme("color clears incomplete");
424		return;
425	}
426	assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
427	assert(pipeline);
428	assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
429	assert(clear_att->colorAttachment < subpass->color_count);
430
431	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
432			      device->meta_state.clear_color_p_layout,
433			      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
434			      &clear_value);
435
436	struct radv_subpass clear_subpass = {
437		.color_count = 1,
438		.color_attachments = (struct radv_subpass_attachment[]) {
439			subpass->color_attachments[clear_att->colorAttachment]
440		},
441		.depth_stencil_attachment = NULL,
442	};
443
444	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
445
446	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
447			     pipeline);
448
449	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
450			.x = clear_rect->rect.offset.x,
451			.y = clear_rect->rect.offset.y,
452			.width = clear_rect->rect.extent.width,
453			.height = clear_rect->rect.extent.height,
454			.minDepth = 0.0f,
455			.maxDepth = 1.0f
456		});
457
458	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
459
460	if (view_mask) {
461		unsigned i;
462		for_each_bit(i, view_mask)
463			radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
464	} else {
465		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
466	}
467
468	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
469}
470
471
472static void
473build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs)
474{
475	nir_builder vs_b, fs_b;
476
477	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
478	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
479
480	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
481	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
482	const struct glsl_type *position_out_type = glsl_vec4_type();
483
484	nir_variable *vs_out_pos =
485		nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type,
486				    "gl_Position");
487	vs_out_pos->data.location = VARYING_SLOT_POS;
488
489	nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(vs_b.shader, nir_intrinsic_load_push_constant);
490	nir_intrinsic_set_base(in_color_load, 0);
491	nir_intrinsic_set_range(in_color_load, 4);
492	in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&vs_b, 0));
493	in_color_load->num_components = 1;
494	nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
495	nir_builder_instr_insert(&vs_b, &in_color_load->instr);
496
497	nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, &in_color_load->dest.ssa);
498	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
499
500	const struct glsl_type *layer_type = glsl_int_type();
501	nir_variable *vs_out_layer =
502		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
503				    "v_layer");
504	vs_out_layer->data.location = VARYING_SLOT_LAYER;
505	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
506	nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
507	nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
508
509	nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
510	nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
511
512	*out_vs = vs_b.shader;
513	*out_fs = fs_b.shader;
514}
515
516static VkResult
517create_depthstencil_renderpass(struct radv_device *device,
518			       uint32_t samples,
519			       VkRenderPass *render_pass)
520{
521	mtx_lock(&device->meta_state.mtx);
522	if (*render_pass) {
523		mtx_unlock(&device->meta_state.mtx);
524		return VK_SUCCESS;
525	}
526
527	VkResult result = radv_CreateRenderPass(radv_device_to_handle(device),
528				       &(VkRenderPassCreateInfo) {
529					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
530						       .attachmentCount = 1,
531						       .pAttachments = &(VkAttachmentDescription) {
532						       .format = VK_FORMAT_D32_SFLOAT_S8_UINT,
533						       .samples = samples,
534						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
535						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
536						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
537						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
538					       },
539						       .subpassCount = 1,
540								.pSubpasses = &(VkSubpassDescription) {
541						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
542						       .inputAttachmentCount = 0,
543						       .colorAttachmentCount = 0,
544						       .pColorAttachments = NULL,
545						       .pResolveAttachments = NULL,
546						       .pDepthStencilAttachment = &(VkAttachmentReference) {
547							       .attachment = 0,
548							       .layout = VK_IMAGE_LAYOUT_GENERAL,
549						       },
550						       .preserveAttachmentCount = 0,
551						       .pPreserveAttachments = NULL,
552					       },
553								.dependencyCount = 0,
554									 }, &device->meta_state.alloc, render_pass);
555	mtx_unlock(&device->meta_state.mtx);
556	return result;
557}
558
559static VkResult
560create_depthstencil_pipeline(struct radv_device *device,
561                             VkImageAspectFlags aspects,
562			     uint32_t samples,
563			     int index,
564			     VkPipeline *pipeline,
565			     VkRenderPass render_pass)
566{
567	struct nir_shader *vs_nir, *fs_nir;
568	VkResult result;
569
570	mtx_lock(&device->meta_state.mtx);
571	if (*pipeline) {
572		mtx_unlock(&device->meta_state.mtx);
573		return VK_SUCCESS;
574	}
575
576	build_depthstencil_shader(&vs_nir, &fs_nir);
577
578	const VkPipelineVertexInputStateCreateInfo vi_state = {
579		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
580		.vertexBindingDescriptionCount = 0,
581		.vertexAttributeDescriptionCount = 0,
582	};
583
584	const VkPipelineDepthStencilStateCreateInfo ds_state = {
585		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
586		.depthTestEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
587		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
588		.depthWriteEnable = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
589		.depthBoundsTestEnable = false,
590		.stencilTestEnable = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
591		.front = {
592			.passOp = VK_STENCIL_OP_REPLACE,
593			.compareOp = VK_COMPARE_OP_ALWAYS,
594			.writeMask = UINT32_MAX,
595			.reference = 0, /* dynamic */
596		},
597		.back = { 0 /* dont care */ },
598	};
599
600	const VkPipelineColorBlendStateCreateInfo cb_state = {
601		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
602		.logicOpEnable = false,
603		.attachmentCount = 0,
604		.pAttachments = NULL,
605	};
606
607	struct radv_graphics_pipeline_create_info extra = {
608		.use_rectlist = true,
609	};
610
611	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
612		extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
613		extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
614	}
615	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
616		extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
617		extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
618	}
619	result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
620				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
621				 device->meta_state.clear_depth_p_layout,
622				 &extra, &device->meta_state.alloc, pipeline);
623
624	mtx_unlock(&device->meta_state.mtx);
625	return result;
626}
627
628static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
629				      const struct radv_image_view *iview,
630				      VkImageAspectFlags aspects,
631				      VkImageLayout layout,
632				      const VkClearRect *clear_rect,
633				      VkClearDepthStencilValue clear_value)
634{
635	if (!iview)
636		return false;
637
638	uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
639	                                                   cmd_buffer->queue_family_index,
640	                                                   cmd_buffer->queue_family_index);
641	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
642	    clear_rect->rect.extent.width != iview->extent.width ||
643	    clear_rect->rect.extent.height != iview->extent.height)
644		return false;
645	if (radv_image_is_tc_compat_htile(iview->image) &&
646	    (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
647	      clear_value.depth != 1.0) ||
648	     ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
649		return false;
650	if (radv_image_has_htile(iview->image) &&
651	    iview->base_mip == 0 &&
652	    iview->base_layer == 0 &&
653	    iview->layer_count == iview->image->info.array_size &&
654	    radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
655	    radv_image_extent_compare(iview->image, &iview->extent))
656		return true;
657	return false;
658}
659
660static VkPipeline
661pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
662			   struct radv_meta_state *meta_state,
663			   const struct radv_image_view *iview,
664			   int samples_log2,
665			   VkImageAspectFlags aspects,
666			   VkImageLayout layout,
667			   const VkClearRect *clear_rect,
668			   VkClearDepthStencilValue clear_value)
669{
670	bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value);
671	int index = DEPTH_CLEAR_SLOW;
672	VkPipeline *pipeline;
673
674	if (fast) {
675		/* we don't know the previous clear values, so we always have
676		 * the NO_EXPCLEAR path */
677		index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;
678	}
679
680	switch (aspects) {
681	case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
682		pipeline = &meta_state->clear[samples_log2].depthstencil_pipeline[index];
683		break;
684	case VK_IMAGE_ASPECT_DEPTH_BIT:
685		pipeline = &meta_state->clear[samples_log2].depth_only_pipeline[index];
686		break;
687	case VK_IMAGE_ASPECT_STENCIL_BIT:
688		pipeline = &meta_state->clear[samples_log2].stencil_only_pipeline[index];
689		break;
690	default:
691		unreachable("expected depth or stencil aspect");
692	}
693
694	if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
695		VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2,
696		                                              &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
697		if (ret != VK_SUCCESS) {
698			cmd_buffer->record_result = ret;
699			return VK_NULL_HANDLE;
700		}
701	}
702
703	if (*pipeline == VK_NULL_HANDLE) {
704		VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index,
705		                                            pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
706		if (ret != VK_SUCCESS) {
707			cmd_buffer->record_result = ret;
708			return VK_NULL_HANDLE;
709		}
710	}
711	return *pipeline;
712}
713
714static void
715emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
716                        const VkClearAttachment *clear_att,
717                        const VkClearRect *clear_rect,
718                        uint32_t view_mask)
719{
720	struct radv_device *device = cmd_buffer->device;
721	struct radv_meta_state *meta_state = &device->meta_state;
722	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
723	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
724	const uint32_t pass_att = subpass->depth_stencil_attachment->attachment;
725	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
726	VkImageAspectFlags aspects = clear_att->aspectMask;
727	const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
728	uint32_t samples, samples_log2;
729	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
730
731	/* When a framebuffer is bound to the current command buffer, get the
732	 * number of samples from it. Otherwise, get the number of samples from
733	 * the render pass because it's likely a secondary command buffer.
734	 */
735	if (iview) {
736		samples = iview->image->info.samples;
737	} else {
738		samples = cmd_buffer->state.pass->attachments[pass_att].samples;
739	}
740
741	samples_log2 = ffs(samples) - 1;
742
743	assert(pass_att != VK_ATTACHMENT_UNUSED);
744
745	if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
746		clear_value.depth = 1.0f;
747
748	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
749			      device->meta_state.clear_depth_p_layout,
750			      VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
751			      &clear_value.depth);
752
753	uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
754	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
755		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
756						  clear_value.stencil);
757	}
758
759	VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer,
760							 meta_state,
761							 iview,
762							 samples_log2,
763							 aspects,
764							 subpass->depth_stencil_attachment->layout,
765							 clear_rect,
766							 clear_value);
767	if (!pipeline)
768		return;
769
770	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
771			     pipeline);
772
773	if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
774	                              subpass->depth_stencil_attachment->layout,
775	                              clear_rect, clear_value))
776		radv_update_ds_clear_metadata(cmd_buffer, iview->image,
777					      clear_value, aspects);
778
779	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
780			.x = clear_rect->rect.offset.x,
781			.y = clear_rect->rect.offset.y,
782			.width = clear_rect->rect.extent.width,
783			.height = clear_rect->rect.extent.height,
784			.minDepth = 0.0f,
785			.maxDepth = 1.0f
786		});
787
788	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
789
790	if (view_mask) {
791		unsigned i;
792		for_each_bit(i, view_mask)
793			radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
794	} else {
795		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
796	}
797
798	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
799		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
800						  prev_reference);
801	}
802}
803
804static uint32_t
805clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
806		 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
807		 uint32_t htile_value, uint32_t htile_mask)
808{
809	struct radv_device *device = cmd_buffer->device;
810	struct radv_meta_state *state = &device->meta_state;
811	uint64_t block_count = round_up_u64(size, 1024);
812	struct radv_meta_saved_state saved_state;
813
814	radv_meta_save(&saved_state, cmd_buffer,
815		       RADV_META_SAVE_COMPUTE_PIPELINE |
816		       RADV_META_SAVE_CONSTANTS |
817		       RADV_META_SAVE_DESCRIPTORS);
818
819	struct radv_buffer dst_buffer = {
820		.bo = bo,
821		.offset = offset,
822		.size = size
823	};
824
825	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
826			     VK_PIPELINE_BIND_POINT_COMPUTE,
827			     state->clear_htile_mask_pipeline);
828
829	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
830			              state->clear_htile_mask_p_layout,
831				      0, /* set */
832				      1, /* descriptorWriteCount */
833				      (VkWriteDescriptorSet[]) {
834				              {
835				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
836				                      .dstBinding = 0,
837				                      .dstArrayElement = 0,
838				                      .descriptorCount = 1,
839				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
840				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
841				                              .buffer = radv_buffer_to_handle(&dst_buffer),
842				                              .offset = 0,
843				                              .range = size
844				                      }
845				              }
846				      });
847
848	const unsigned constants[2] = {
849		htile_value & htile_mask,
850		~htile_mask,
851	};
852
853	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
854			      state->clear_htile_mask_p_layout,
855			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
856			      constants);
857
858	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
859
860	radv_meta_restore(&saved_state, cmd_buffer);
861
862	return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
863	       RADV_CMD_FLAG_INV_VMEM_L1 |
864	       RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
865}
866
867static uint32_t
868radv_get_htile_fast_clear_value(const struct radv_image *image,
869				VkClearDepthStencilValue value)
870{
871	uint32_t clear_value;
872
873	if (!image->planes[0].surface.has_stencil) {
874		clear_value = value.depth ? 0xfffffff0 : 0;
875	} else {
876		clear_value = value.depth ? 0xfffc0000 : 0;
877	}
878
879	return clear_value;
880}
881
882static uint32_t
883radv_get_htile_mask(const struct radv_image *image, VkImageAspectFlags aspects)
884{
885	uint32_t mask = 0;
886
887	if (!image->planes[0].surface.has_stencil) {
888		/* All the HTILE buffer is used when there is no stencil. */
889		mask = UINT32_MAX;
890	} else {
891		if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
892			mask |= 0xfffffc0f;
893		if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
894			mask |= 0x000003f0;
895	}
896
897	return mask;
898}
899
900static bool
901radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
902{
903	return value.depth == 1.0f || value.depth == 0.0f;
904}
905
906static bool
907radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
908{
909	return value.stencil == 0;
910}
911
912/**
913 * Determine if the given image can be fast cleared.
914 */
915static bool
916radv_image_can_fast_clear(struct radv_device *device,  struct radv_image *image)
917{
918	if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
919		return false;
920
921	if (vk_format_is_color(image->vk_format)) {
922		if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
923			return false;
924
925		/* RB+ doesn't work with CMASK fast clear on Stoney. */
926		if (!radv_image_has_dcc(image) &&
927		    device->physical_device->rad_info.family == CHIP_STONEY)
928			return false;
929	} else {
930		if (!radv_image_has_htile(image))
931			return false;
932	}
933
934	/* Do not fast clears 3D images. */
935	if (image->type == VK_IMAGE_TYPE_3D)
936		return false;
937
938	return true;
939}
940
941/**
942 * Determine if the given image view can be fast cleared.
943 */
944static bool
945radv_image_view_can_fast_clear(struct radv_device *device,
946			       const struct radv_image_view *iview)
947{
948	struct radv_image *image;
949
950	if (!iview)
951		return false;
952	image = iview->image;
953
954	/* Only fast clear if the image itself can be fast cleared. */
955	if (!radv_image_can_fast_clear(device, image))
956		return false;
957
958	/* Only fast clear if all layers are bound. */
959	if (iview->base_layer > 0 ||
960	    iview->layer_count != image->info.array_size)
961		return false;
962
963	/* Only fast clear if the view covers the whole image. */
964	if (!radv_image_extent_compare(image, &iview->extent))
965		return false;
966
967	return true;
968}
969
970static bool
971radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
972			  const struct radv_image_view *iview,
973			  VkImageLayout image_layout,
974			  VkImageAspectFlags aspects,
975			  const VkClearRect *clear_rect,
976			  const VkClearDepthStencilValue clear_value,
977			  uint32_t view_mask)
978{
979	if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
980		return false;
981
982	if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
983		return false;
984
985	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
986	    clear_rect->rect.extent.width != iview->image->info.width ||
987	    clear_rect->rect.extent.height != iview->image->info.height)
988		return false;
989
990	if (view_mask && (iview->image->info.array_size >= 32 ||
991	                 (1u << iview->image->info.array_size) - 1u != view_mask))
992		return false;
993	if (!view_mask && clear_rect->baseArrayLayer != 0)
994		return false;
995	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
996		return false;
997
998	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 &&
999	    (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ||
1000	    ((vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1001	     !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT))))
1002		return false;
1003
1004	if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1005	    !radv_is_fast_clear_depth_allowed(clear_value)) ||
1006	    ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1007	     !radv_is_fast_clear_stencil_allowed(clear_value)))
1008		return false;
1009
1010	return true;
1011}
1012
1013static void
1014radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
1015		      const struct radv_image_view *iview,
1016		      const VkClearAttachment *clear_att,
1017		      enum radv_cmd_flush_bits *pre_flush,
1018		      enum radv_cmd_flush_bits *post_flush)
1019{
1020	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1021	VkImageAspectFlags aspects = clear_att->aspectMask;
1022	uint32_t clear_word, flush_bits;
1023	uint32_t htile_mask;
1024
1025	clear_word = radv_get_htile_fast_clear_value(iview->image, clear_value);
1026	htile_mask = radv_get_htile_mask(iview->image, aspects);
1027
1028	if (pre_flush) {
1029		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
1030						 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) & ~ *pre_flush;
1031		*pre_flush |= cmd_buffer->state.flush_bits;
1032	}
1033
1034	if (htile_mask == UINT_MAX) {
1035		/* Clear the whole HTILE buffer. */
1036		flush_bits = radv_fill_buffer(cmd_buffer, iview->image->bo,
1037					      iview->image->offset + iview->image->htile_offset,
1038					      iview->image->planes[0].surface.htile_size, clear_word);
1039	} else {
1040		/* Only clear depth or stencil bytes in the HTILE buffer. */
1041		assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9);
1042		flush_bits = clear_htile_mask(cmd_buffer, iview->image->bo,
1043					      iview->image->offset + iview->image->htile_offset,
1044					      iview->image->planes[0].surface.htile_size, clear_word,
1045					      htile_mask);
1046	}
1047
1048	radv_update_ds_clear_metadata(cmd_buffer, iview->image, clear_value, aspects);
1049	if (post_flush) {
1050		*post_flush |= flush_bits;
1051	}
1052}
1053
1054static nir_shader *
1055build_clear_htile_mask_shader()
1056{
1057	nir_builder b;
1058
1059	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1060	b.shader->info.name = ralloc_strdup(b.shader, "meta_clear_htile_mask");
1061	b.shader->info.cs.local_size[0] = 64;
1062	b.shader->info.cs.local_size[1] = 1;
1063	b.shader->info.cs.local_size[2] = 1;
1064
1065	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1066	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1067	nir_ssa_def *block_size = nir_imm_ivec4(&b,
1068						b.shader->info.cs.local_size[0],
1069						b.shader->info.cs.local_size[1],
1070						b.shader->info.cs.local_size[2], 0);
1071
1072	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1073
1074	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
1075	offset = nir_channel(&b, offset, 0);
1076
1077	nir_intrinsic_instr *buf =
1078		nir_intrinsic_instr_create(b.shader,
1079					   nir_intrinsic_vulkan_resource_index);
1080
1081	buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1082	buf->num_components = 1;
1083	nir_intrinsic_set_desc_set(buf, 0);
1084	nir_intrinsic_set_binding(buf, 0);
1085	nir_ssa_dest_init(&buf->instr, &buf->dest, buf->num_components, 32, NULL);
1086	nir_builder_instr_insert(&b, &buf->instr);
1087
1088	nir_intrinsic_instr *constants =
1089		nir_intrinsic_instr_create(b.shader,
1090					   nir_intrinsic_load_push_constant);
1091	nir_intrinsic_set_base(constants, 0);
1092	nir_intrinsic_set_range(constants, 8);
1093	constants->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1094	constants->num_components = 2;
1095	nir_ssa_dest_init(&constants->instr, &constants->dest, 2, 32, "constants");
1096	nir_builder_instr_insert(&b, &constants->instr);
1097
1098	nir_intrinsic_instr *load =
1099		nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
1100	load->src[0] = nir_src_for_ssa(&buf->dest.ssa);
1101	load->src[1] = nir_src_for_ssa(offset);
1102	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
1103	load->num_components = 4;
1104	nir_builder_instr_insert(&b, &load->instr);
1105
1106	/* data = (data & ~htile_mask) | (htile_value & htile_mask) */
1107	nir_ssa_def *data =
1108		nir_iand(&b, &load->dest.ssa,
1109			 nir_channel(&b, &constants->dest.ssa, 1));
1110	data = nir_ior(&b, data, nir_channel(&b, &constants->dest.ssa, 0));
1111
1112	nir_intrinsic_instr *store =
1113		nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
1114	store->src[0] = nir_src_for_ssa(data);
1115	store->src[1] = nir_src_for_ssa(&buf->dest.ssa);
1116	store->src[2] = nir_src_for_ssa(offset);
1117	nir_intrinsic_set_write_mask(store, 0xf);
1118	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
1119	store->num_components = 4;
1120	nir_builder_instr_insert(&b, &store->instr);
1121
1122	return b.shader;
1123}
1124
1125static VkResult
1126init_meta_clear_htile_mask_state(struct radv_device *device)
1127{
1128	struct radv_meta_state *state = &device->meta_state;
1129	struct radv_shader_module cs = { .nir = NULL };
1130	VkResult result;
1131
1132	cs.nir = build_clear_htile_mask_shader();
1133
1134	VkDescriptorSetLayoutCreateInfo ds_layout_info = {
1135		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1136		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1137		.bindingCount = 1,
1138		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1139			{
1140				.binding = 0,
1141				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1142				.descriptorCount = 1,
1143				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1144				.pImmutableSamplers = NULL
1145			},
1146		}
1147	};
1148
1149	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1150						&ds_layout_info, &state->alloc,
1151						&state->clear_htile_mask_ds_layout);
1152	if (result != VK_SUCCESS)
1153		goto fail;
1154
1155	VkPipelineLayoutCreateInfo p_layout_info = {
1156		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1157		.setLayoutCount = 1,
1158		.pSetLayouts = &state->clear_htile_mask_ds_layout,
1159		.pushConstantRangeCount = 1,
1160		.pPushConstantRanges = &(VkPushConstantRange){
1161			VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
1162		},
1163	};
1164
1165	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1166					  &p_layout_info, &state->alloc,
1167					  &state->clear_htile_mask_p_layout);
1168	if (result != VK_SUCCESS)
1169		goto fail;
1170
1171	VkPipelineShaderStageCreateInfo shader_stage = {
1172		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1173		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1174		.module = radv_shader_module_to_handle(&cs),
1175		.pName = "main",
1176		.pSpecializationInfo = NULL,
1177	};
1178
1179	VkComputePipelineCreateInfo pipeline_info = {
1180		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1181		.stage = shader_stage,
1182		.flags = 0,
1183		.layout = state->clear_htile_mask_p_layout,
1184	};
1185
1186	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1187					     radv_pipeline_cache_to_handle(&state->cache),
1188					     1, &pipeline_info, NULL,
1189					     &state->clear_htile_mask_pipeline);
1190
1191	ralloc_free(cs.nir);
1192	return result;
1193fail:
1194	ralloc_free(cs.nir);
1195	return result;
1196}
1197
1198VkResult
1199radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1200{
1201	VkResult res;
1202	struct radv_meta_state *state = &device->meta_state;
1203
1204	VkPipelineLayoutCreateInfo pl_color_create_info = {
1205		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1206		.setLayoutCount = 0,
1207		.pushConstantRangeCount = 1,
1208		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
1209	};
1210
1211	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1212					&pl_color_create_info,
1213					&device->meta_state.alloc,
1214					&device->meta_state.clear_color_p_layout);
1215	if (res != VK_SUCCESS)
1216		goto fail;
1217
1218	VkPipelineLayoutCreateInfo pl_depth_create_info = {
1219		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1220		.setLayoutCount = 0,
1221		.pushConstantRangeCount = 1,
1222		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
1223	};
1224
1225	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1226					&pl_depth_create_info,
1227					&device->meta_state.alloc,
1228					&device->meta_state.clear_depth_p_layout);
1229	if (res != VK_SUCCESS)
1230		goto fail;
1231
1232	res = init_meta_clear_htile_mask_state(device);
1233	if (res != VK_SUCCESS)
1234		goto fail;
1235
1236	if (on_demand)
1237		return VK_SUCCESS;
1238
1239	for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
1240		uint32_t samples = 1 << i;
1241		for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1242			VkFormat format = radv_fs_key_format_exemplars[j];
1243			unsigned fs_key = radv_format_meta_fs_key(format);
1244			assert(!state->clear[i].color_pipelines[fs_key]);
1245
1246			res = create_color_renderpass(device, format, samples,
1247						      &state->clear[i].render_pass[fs_key]);
1248			if (res != VK_SUCCESS)
1249				goto fail;
1250
1251			res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
1252						    state->clear[i].render_pass[fs_key]);
1253			if (res != VK_SUCCESS)
1254				goto fail;
1255
1256		}
1257
1258		res = create_depthstencil_renderpass(device,
1259						     samples,
1260						     &state->clear[i].depthstencil_rp);
1261		if (res != VK_SUCCESS)
1262			goto fail;
1263
1264		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1265			res = create_depthstencil_pipeline(device,
1266							   VK_IMAGE_ASPECT_DEPTH_BIT,
1267							   samples,
1268							   j,
1269							   &state->clear[i].depth_only_pipeline[j],
1270							   state->clear[i].depthstencil_rp);
1271			if (res != VK_SUCCESS)
1272				goto fail;
1273
1274			res = create_depthstencil_pipeline(device,
1275							   VK_IMAGE_ASPECT_STENCIL_BIT,
1276							   samples,
1277							   j,
1278							   &state->clear[i].stencil_only_pipeline[j],
1279							   state->clear[i].depthstencil_rp);
1280			if (res != VK_SUCCESS)
1281				goto fail;
1282
1283			res = create_depthstencil_pipeline(device,
1284							   VK_IMAGE_ASPECT_DEPTH_BIT |
1285							   VK_IMAGE_ASPECT_STENCIL_BIT,
1286							   samples,
1287							   j,
1288							   &state->clear[i].depthstencil_pipeline[j],
1289							   state->clear[i].depthstencil_rp);
1290			if (res != VK_SUCCESS)
1291				goto fail;
1292		}
1293	}
1294	return VK_SUCCESS;
1295
1296fail:
1297	radv_device_finish_meta_clear_state(device);
1298	return res;
1299}
1300
1301static uint32_t
1302radv_get_cmask_fast_clear_value(const struct radv_image *image)
1303{
1304	uint32_t value = 0; /* Default value when no DCC. */
1305
1306	/* The fast-clear value is different for images that have both DCC and
1307	 * CMASK metadata.
1308	 */
1309	if (radv_image_has_dcc(image)) {
1310		/* DCC fast clear with MSAA should clear CMASK to 0xC. */
1311		return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
1312	}
1313
1314	return value;
1315}
1316
1317uint32_t
1318radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
1319		 struct radv_image *image, uint32_t value)
1320{
1321	return radv_fill_buffer(cmd_buffer, image->bo,
1322				image->offset + image->cmask.offset,
1323				image->cmask.size, value);
1324}
1325
1326
1327uint32_t
1328radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
1329		 struct radv_image *image, uint32_t value)
1330{
1331	return radv_fill_buffer(cmd_buffer, image->bo,
1332				image->offset + image->fmask.offset,
1333				image->fmask.size, value);
1334}
1335
1336uint32_t
1337radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
1338	       struct radv_image *image, uint32_t value)
1339{
1340	/* Mark the image as being compressed. */
1341	radv_update_dcc_metadata(cmd_buffer, image, true);
1342
1343	return radv_fill_buffer(cmd_buffer, image->bo,
1344				image->offset + image->dcc_offset,
1345				image->planes[0].surface.dcc_size, value);
1346}
1347
1348static void vi_get_fast_clear_parameters(VkFormat format,
1349					 const VkClearColorValue *clear_value,
1350					 uint32_t* reset_value,
1351					 bool *can_avoid_fast_clear_elim)
1352{
1353	bool values[4] = {};
1354	int extra_channel;
1355	bool main_value = false;
1356	bool extra_value = false;
1357	int i;
1358	*can_avoid_fast_clear_elim = false;
1359
1360	*reset_value = 0x20202020U;
1361
1362	const struct vk_format_description *desc = vk_format_description(format);
1363	if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
1364	    format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1365	    format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1366		extra_channel = -1;
1367	else if (desc->layout == VK_FORMAT_LAYOUT_PLAIN) {
1368		if (radv_translate_colorswap(format, false) <= 1)
1369			extra_channel = desc->nr_channels - 1;
1370		else
1371			extra_channel = 0;
1372	} else
1373		return;
1374
1375	for (i = 0; i < 4; i++) {
1376		int index = desc->swizzle[i] - VK_SWIZZLE_X;
1377		if (desc->swizzle[i] < VK_SWIZZLE_X ||
1378		    desc->swizzle[i] > VK_SWIZZLE_W)
1379			continue;
1380
1381		if (desc->channel[i].pure_integer &&
1382		    desc->channel[i].type == VK_FORMAT_TYPE_SIGNED) {
1383			/* Use the maximum value for clamping the clear color. */
1384			int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1385
1386			values[i] = clear_value->int32[i] != 0;
1387			if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1388				return;
1389		} else if (desc->channel[i].pure_integer &&
1390			   desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED) {
1391			/* Use the maximum value for clamping the clear color. */
1392			unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1393
1394			values[i] = clear_value->uint32[i] != 0U;
1395			if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1396				return;
1397		} else {
1398			values[i] = clear_value->float32[i] != 0.0F;
1399			if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1400				return;
1401		}
1402
1403		if (index == extra_channel)
1404			extra_value = values[i];
1405		else
1406			main_value = values[i];
1407	}
1408
1409	for (int i = 0; i < 4; ++i)
1410		if (values[i] != main_value &&
1411		    desc->swizzle[i] - VK_SWIZZLE_X != extra_channel &&
1412		    desc->swizzle[i] >= VK_SWIZZLE_X &&
1413		    desc->swizzle[i] <= VK_SWIZZLE_W)
1414			return;
1415
1416	*can_avoid_fast_clear_elim = true;
1417	if (main_value)
1418		*reset_value |= 0x80808080U;
1419
1420	if (extra_value)
1421		*reset_value |= 0x40404040U;
1422	return;
1423}
1424
1425static bool
1426radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
1427			  const struct radv_image_view *iview,
1428			  VkImageLayout image_layout,
1429			  const VkClearRect *clear_rect,
1430			  VkClearColorValue clear_value,
1431			  uint32_t view_mask)
1432{
1433	uint32_t clear_color[2];
1434
1435	if (!radv_image_view_can_fast_clear(cmd_buffer->device, iview))
1436		return false;
1437
1438	if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
1439		return false;
1440
1441	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1442	    clear_rect->rect.extent.width != iview->image->info.width ||
1443	    clear_rect->rect.extent.height != iview->image->info.height)
1444		return false;
1445
1446	if (view_mask && (iview->image->info.array_size >= 32 ||
1447	                 (1u << iview->image->info.array_size) - 1u != view_mask))
1448		return false;
1449	if (!view_mask && clear_rect->baseArrayLayer != 0)
1450		return false;
1451	if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
1452		return false;
1453
1454	/* DCC */
1455	if (!radv_format_pack_clear_color(iview->vk_format,
1456					  clear_color, &clear_value))
1457		return false;
1458
1459	if (radv_image_has_dcc(iview->image)) {
1460		bool can_avoid_fast_clear_elim;
1461		uint32_t reset_value;
1462
1463		vi_get_fast_clear_parameters(iview->vk_format,
1464					     &clear_value, &reset_value,
1465					     &can_avoid_fast_clear_elim);
1466
1467		if (iview->image->info.samples > 1) {
1468			/* DCC fast clear with MSAA should clear CMASK. */
1469			/* FIXME: This doesn't work for now. There is a
1470			 * hardware bug with fast clears and DCC for MSAA
1471			 * textures. AMDVLK has a workaround but it doesn't
1472			 * seem to work here. Note that we might emit useless
1473			 * CB flushes but that shouldn't matter.
1474			 */
1475			if (!can_avoid_fast_clear_elim)
1476				return false;
1477		}
1478	}
1479
1480	return true;
1481}
1482
1483
1484static void
1485radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
1486		      const struct radv_image_view *iview,
1487		      const VkClearAttachment *clear_att,
1488		      uint32_t subpass_att,
1489		      enum radv_cmd_flush_bits *pre_flush,
1490		      enum radv_cmd_flush_bits *post_flush)
1491{
1492	VkClearColorValue clear_value = clear_att->clearValue.color;
1493	uint32_t clear_color[2], flush_bits = 0;
1494	uint32_t cmask_clear_value;
1495
1496	if (pre_flush) {
1497		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
1498						 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) & ~ *pre_flush;
1499		*pre_flush |= cmd_buffer->state.flush_bits;
1500	}
1501
1502	/* DCC */
1503	radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
1504
1505	cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1506
1507	/* clear cmask buffer */
1508	if (radv_image_has_dcc(iview->image)) {
1509		uint32_t reset_value;
1510		bool can_avoid_fast_clear_elim;
1511		bool need_decompress_pass = false;
1512
1513		vi_get_fast_clear_parameters(iview->vk_format,
1514					     &clear_value, &reset_value,
1515					     &can_avoid_fast_clear_elim);
1516
1517		if (radv_image_has_cmask(iview->image)) {
1518			flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
1519						      cmask_clear_value);
1520
1521			need_decompress_pass = true;
1522		}
1523
1524		if (!can_avoid_fast_clear_elim)
1525			need_decompress_pass = true;
1526
1527		flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, reset_value);
1528
1529		radv_update_fce_metadata(cmd_buffer, iview->image,
1530					 need_decompress_pass);
1531	} else {
1532		flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
1533					      cmask_clear_value);
1534	}
1535
1536	if (post_flush) {
1537		*post_flush |= flush_bits;
1538	}
1539
1540	radv_update_color_clear_metadata(cmd_buffer, iview->image, subpass_att,
1541					 clear_color);
1542}
1543
1544/**
1545 * The parameters mean that same as those in vkCmdClearAttachments.
1546 */
1547static void
1548emit_clear(struct radv_cmd_buffer *cmd_buffer,
1549           const VkClearAttachment *clear_att,
1550           const VkClearRect *clear_rect,
1551           enum radv_cmd_flush_bits *pre_flush,
1552           enum radv_cmd_flush_bits *post_flush,
1553           uint32_t view_mask)
1554{
1555	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
1556	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
1557	VkImageAspectFlags aspects = clear_att->aspectMask;
1558
1559	if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1560		const uint32_t subpass_att = clear_att->colorAttachment;
1561		assert(subpass_att < subpass->color_count);
1562		const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
1563		if (pass_att == VK_ATTACHMENT_UNUSED)
1564			return;
1565
1566		VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
1567		const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
1568		VkClearColorValue clear_value = clear_att->clearValue.color;
1569
1570		if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout,
1571					      clear_rect, clear_value, view_mask)) {
1572			radv_fast_clear_color(cmd_buffer, iview, clear_att,
1573					      subpass_att, pre_flush,
1574					      post_flush);
1575		} else {
1576			emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1577		}
1578	} else {
1579		if (!subpass->depth_stencil_attachment)
1580			return;
1581
1582		const uint32_t pass_att = subpass->depth_stencil_attachment->attachment;
1583		if (pass_att == VK_ATTACHMENT_UNUSED)
1584			return;
1585
1586		VkImageLayout image_layout = subpass->depth_stencil_attachment->layout;
1587		const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
1588		VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1589
1590		assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1591				  VK_IMAGE_ASPECT_STENCIL_BIT));
1592
1593		if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout,
1594		                              aspects, clear_rect, clear_value,
1595		                              view_mask)) {
1596			radv_fast_clear_depth(cmd_buffer, iview, clear_att,
1597			                      pre_flush, post_flush);
1598		} else {
1599			emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect,
1600			                        view_mask);
1601		}
1602	}
1603}
1604
1605static inline bool
1606radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
1607{
1608	uint32_t view_mask = cmd_state->subpass->view_mask;
1609	return (a != VK_ATTACHMENT_UNUSED &&
1610		cmd_state->attachments[a].pending_clear_aspects &&
1611		(!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
1612}
1613
1614static bool
1615radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
1616{
1617	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1618	uint32_t a;
1619
1620	if (!cmd_state->subpass)
1621		return false;
1622
1623	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1624		a = cmd_state->subpass->color_attachments[i].attachment;
1625		if (radv_attachment_needs_clear(cmd_state, a))
1626			return true;
1627	}
1628
1629	if (!cmd_state->subpass->depth_stencil_attachment)
1630		return false;
1631
1632	a = cmd_state->subpass->depth_stencil_attachment->attachment;
1633	return radv_attachment_needs_clear(cmd_state, a);
1634}
1635
1636static void
1637radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
1638			      struct radv_attachment_state *attachment,
1639			      const VkClearAttachment *clear_att,
1640			      enum radv_cmd_flush_bits *pre_flush,
1641			      enum radv_cmd_flush_bits *post_flush)
1642{
1643	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1644	uint32_t view_mask = cmd_state->subpass->view_mask;
1645
1646	VkClearRect clear_rect = {
1647		.rect = cmd_state->render_area,
1648		.baseArrayLayer = 0,
1649		.layerCount = cmd_state->framebuffer->layers,
1650	};
1651
1652	emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
1653		   view_mask & ~attachment->cleared_views);
1654	if (view_mask)
1655		attachment->cleared_views |= view_mask;
1656	else
1657		attachment->pending_clear_aspects = 0;
1658}
1659
1660/**
1661 * Emit any pending attachment clears for the current subpass.
1662 *
1663 * @see radv_attachment_state::pending_clear_aspects
1664 */
1665void
1666radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
1667{
1668	struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1669	struct radv_meta_saved_state saved_state;
1670	enum radv_cmd_flush_bits pre_flush = 0;
1671	enum radv_cmd_flush_bits post_flush = 0;
1672
1673	if (!radv_subpass_needs_clear(cmd_buffer))
1674		return;
1675
1676	radv_meta_save(&saved_state, cmd_buffer,
1677		       RADV_META_SAVE_GRAPHICS_PIPELINE |
1678		       RADV_META_SAVE_CONSTANTS);
1679
1680	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1681		uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
1682
1683		if (!radv_attachment_needs_clear(cmd_state, a))
1684			continue;
1685
1686		assert(cmd_state->attachments[a].pending_clear_aspects ==
1687		       VK_IMAGE_ASPECT_COLOR_BIT);
1688
1689		VkClearAttachment clear_att = {
1690			.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1691			.colorAttachment = i, /* Use attachment index relative to subpass */
1692			.clearValue = cmd_state->attachments[a].clear_value,
1693		};
1694
1695		radv_subpass_clear_attachment(cmd_buffer,
1696					      &cmd_state->attachments[a],
1697					      &clear_att, &pre_flush,
1698					      &post_flush);
1699	}
1700
1701	if (cmd_state->subpass->depth_stencil_attachment) {
1702		uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
1703		if (radv_attachment_needs_clear(cmd_state, ds)) {
1704			VkClearAttachment clear_att = {
1705				.aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
1706				.clearValue = cmd_state->attachments[ds].clear_value,
1707			};
1708
1709			radv_subpass_clear_attachment(cmd_buffer,
1710						      &cmd_state->attachments[ds],
1711						      &clear_att, &pre_flush,
1712						      &post_flush);
1713		}
1714	}
1715
1716	radv_meta_restore(&saved_state, cmd_buffer);
1717	cmd_buffer->state.flush_bits |= post_flush;
1718}
1719
1720static void
1721radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
1722		       struct radv_image *image,
1723		       VkImageLayout image_layout,
1724		       const VkImageSubresourceRange *range,
1725		       VkFormat format, int level, int layer,
1726		       const VkClearValue *clear_val)
1727{
1728	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
1729	struct radv_image_view iview;
1730	uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
1731	uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
1732
1733	radv_image_view_init(&iview, cmd_buffer->device,
1734			     &(VkImageViewCreateInfo) {
1735				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1736					     .image = radv_image_to_handle(image),
1737					     .viewType = radv_meta_get_view_type(image),
1738					     .format = format,
1739					     .subresourceRange = {
1740					     .aspectMask = range->aspectMask,
1741					     .baseMipLevel = range->baseMipLevel + level,
1742					     .levelCount = 1,
1743					     .baseArrayLayer = range->baseArrayLayer + layer,
1744					     .layerCount = 1
1745				     },
1746			     });
1747
1748	VkFramebuffer fb;
1749	radv_CreateFramebuffer(device_h,
1750			       &(VkFramebufferCreateInfo) {
1751				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1752					       .attachmentCount = 1,
1753					       .pAttachments = (VkImageView[]) {
1754					       radv_image_view_to_handle(&iview),
1755				       },
1756					       .width = width,
1757					       .height = height,
1758					       .layers = 1
1759			       },
1760			       &cmd_buffer->pool->alloc,
1761			       &fb);
1762
1763	VkAttachmentDescription att_desc = {
1764		.format = iview.vk_format,
1765		.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1766		.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1767		.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1768		.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
1769		.initialLayout = image_layout,
1770		.finalLayout = image_layout,
1771	};
1772
1773	VkSubpassDescription subpass_desc = {
1774		.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1775		.inputAttachmentCount = 0,
1776		.colorAttachmentCount = 0,
1777		.pColorAttachments = NULL,
1778		.pResolveAttachments = NULL,
1779		.pDepthStencilAttachment = NULL,
1780		.preserveAttachmentCount = 0,
1781		.pPreserveAttachments = NULL,
1782	};
1783
1784	const VkAttachmentReference att_ref = {
1785		.attachment = 0,
1786		.layout = image_layout,
1787	};
1788
1789	if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1790		subpass_desc.colorAttachmentCount = 1;
1791		subpass_desc.pColorAttachments = &att_ref;
1792	} else {
1793		subpass_desc.pDepthStencilAttachment = &att_ref;
1794	}
1795
1796	VkRenderPass pass;
1797	radv_CreateRenderPass(device_h,
1798			      &(VkRenderPassCreateInfo) {
1799				      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1800					      .attachmentCount = 1,
1801					      .pAttachments = &att_desc,
1802					      .subpassCount = 1,
1803					      .pSubpasses = &subpass_desc,
1804					      },
1805			      &cmd_buffer->pool->alloc,
1806			      &pass);
1807
1808	radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
1809				&(VkRenderPassBeginInfo) {
1810					.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
1811						.renderArea = {
1812						.offset = { 0, 0, },
1813						.extent = {
1814							.width = width,
1815							.height = height,
1816						},
1817					},
1818						.renderPass = pass,
1819						.framebuffer = fb,
1820						.clearValueCount = 0,
1821						.pClearValues = NULL,
1822						},
1823				VK_SUBPASS_CONTENTS_INLINE);
1824
1825	VkClearAttachment clear_att = {
1826		.aspectMask = range->aspectMask,
1827		.colorAttachment = 0,
1828		.clearValue = *clear_val,
1829	};
1830
1831	VkClearRect clear_rect = {
1832		.rect = {
1833			.offset = { 0, 0 },
1834			.extent = { width, height },
1835		},
1836		.baseArrayLayer = range->baseArrayLayer,
1837		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
1838	};
1839
1840	emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0);
1841
1842	radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
1843	radv_DestroyRenderPass(device_h, pass,
1844			       &cmd_buffer->pool->alloc);
1845	radv_DestroyFramebuffer(device_h, fb,
1846				&cmd_buffer->pool->alloc);
1847}
1848
1849/**
1850 * Return TRUE if a fast color or depth clear has been performed.
1851 */
1852static bool
1853radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
1854		      struct radv_image *image,
1855		      VkFormat format,
1856		      VkImageLayout image_layout,
1857		      const VkImageSubresourceRange *range,
1858		      const VkClearValue *clear_val)
1859{
1860	struct radv_image_view iview;
1861
1862	radv_image_view_init(&iview, cmd_buffer->device,
1863			     &(VkImageViewCreateInfo) {
1864					.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1865					.image = radv_image_to_handle(image),
1866					.viewType = radv_meta_get_view_type(image),
1867					.format = image->vk_format,
1868					.subresourceRange = {
1869					.aspectMask = range->aspectMask,
1870					.baseMipLevel = range->baseMipLevel,
1871					.levelCount = range->levelCount,
1872					.baseArrayLayer = range->baseArrayLayer,
1873					.layerCount = range->layerCount,
1874				   },
1875			     });
1876
1877	VkClearRect clear_rect = {
1878		.rect = {
1879			.offset = { 0, 0 },
1880			.extent = {
1881				radv_minify(image->info.width, range->baseMipLevel),
1882				radv_minify(image->info.height, range->baseMipLevel),
1883			},
1884		},
1885		.baseArrayLayer = range->baseArrayLayer,
1886		.layerCount = range->layerCount,
1887	};
1888
1889	VkClearAttachment clear_att = {
1890		.aspectMask = range->aspectMask,
1891		.colorAttachment = 0,
1892		.clearValue = *clear_val,
1893	};
1894
1895	if (vk_format_is_color(format)) {
1896		if (radv_can_fast_clear_color(cmd_buffer, &iview,
1897					      image_layout, &clear_rect,
1898					      clear_att.clearValue.color, 0)) {
1899			radv_fast_clear_color(cmd_buffer, &iview, &clear_att,
1900					      clear_att.colorAttachment,
1901					      NULL, NULL);
1902			return true;
1903		}
1904	} else {
1905		if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
1906					      range->aspectMask, &clear_rect,
1907					      clear_att.clearValue.depthStencil, 0)) {
1908			radv_fast_clear_depth(cmd_buffer, &iview, &clear_att,
1909			                      NULL, NULL);
1910			return true;
1911		}
1912	}
1913
1914	return false;
1915}
1916
1917static void
1918radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
1919		     struct radv_image *image,
1920		     VkImageLayout image_layout,
1921		     const VkClearValue *clear_value,
1922		     uint32_t range_count,
1923		     const VkImageSubresourceRange *ranges,
1924		     bool cs)
1925{
1926	VkFormat format = image->vk_format;
1927	VkClearValue internal_clear_value = *clear_value;
1928
1929	if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
1930		uint32_t value;
1931		format = VK_FORMAT_R32_UINT;
1932		value = float3_to_rgb9e5(clear_value->color.float32);
1933		internal_clear_value.color.uint32[0] = value;
1934	}
1935
1936	if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
1937		uint8_t r, g;
1938		format = VK_FORMAT_R8_UINT;
1939		r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
1940		g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
1941		internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
1942	}
1943
1944	if (format == VK_FORMAT_R32G32B32_UINT ||
1945	    format == VK_FORMAT_R32G32B32_SINT ||
1946	    format == VK_FORMAT_R32G32B32_SFLOAT)
1947		cs = true;
1948
1949	for (uint32_t r = 0; r < range_count; r++) {
1950		const VkImageSubresourceRange *range = &ranges[r];
1951
1952		/* Try to perform a fast clear first, otherwise fallback to
1953		 * the legacy path.
1954		 */
1955		if (!cs &&
1956		    radv_fast_clear_range(cmd_buffer, image, format,
1957					  image_layout, range,
1958					  &internal_clear_value)) {
1959			continue;
1960		}
1961
1962		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
1963			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
1964				radv_minify(image->info.depth, range->baseMipLevel + l) :
1965				radv_get_layerCount(image, range);
1966			for (uint32_t s = 0; s < layer_count; ++s) {
1967
1968				if (cs) {
1969					struct radv_meta_blit2d_surf surf;
1970					surf.format = format;
1971					surf.image = image;
1972					surf.level = range->baseMipLevel + l;
1973					surf.layer = range->baseArrayLayer + s;
1974					surf.aspect_mask = range->aspectMask;
1975					radv_meta_clear_image_cs(cmd_buffer, &surf,
1976								 &internal_clear_value.color);
1977				} else {
1978					radv_clear_image_layer(cmd_buffer, image, image_layout,
1979							       range, format, l, s, &internal_clear_value);
1980				}
1981			}
1982		}
1983	}
1984}
1985
1986void radv_CmdClearColorImage(
1987	VkCommandBuffer                             commandBuffer,
1988	VkImage                                     image_h,
1989	VkImageLayout                               imageLayout,
1990	const VkClearColorValue*                    pColor,
1991	uint32_t                                    rangeCount,
1992	const VkImageSubresourceRange*              pRanges)
1993{
1994	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1995	RADV_FROM_HANDLE(radv_image, image, image_h);
1996	struct radv_meta_saved_state saved_state;
1997	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
1998
1999	if (cs) {
2000		radv_meta_save(&saved_state, cmd_buffer,
2001			       RADV_META_SAVE_COMPUTE_PIPELINE |
2002			       RADV_META_SAVE_CONSTANTS |
2003			       RADV_META_SAVE_DESCRIPTORS);
2004	} else {
2005		radv_meta_save(&saved_state, cmd_buffer,
2006			       RADV_META_SAVE_GRAPHICS_PIPELINE |
2007			       RADV_META_SAVE_CONSTANTS);
2008	}
2009
2010	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
2011			     (const VkClearValue *) pColor,
2012			     rangeCount, pRanges, cs);
2013
2014	radv_meta_restore(&saved_state, cmd_buffer);
2015}
2016
2017void radv_CmdClearDepthStencilImage(
2018	VkCommandBuffer                             commandBuffer,
2019	VkImage                                     image_h,
2020	VkImageLayout                               imageLayout,
2021	const VkClearDepthStencilValue*             pDepthStencil,
2022	uint32_t                                    rangeCount,
2023	const VkImageSubresourceRange*              pRanges)
2024{
2025	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2026	RADV_FROM_HANDLE(radv_image, image, image_h);
2027	struct radv_meta_saved_state saved_state;
2028
2029	radv_meta_save(&saved_state, cmd_buffer,
2030		       RADV_META_SAVE_GRAPHICS_PIPELINE |
2031		       RADV_META_SAVE_CONSTANTS);
2032
2033	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
2034			     (const VkClearValue *) pDepthStencil,
2035			     rangeCount, pRanges, false);
2036
2037	radv_meta_restore(&saved_state, cmd_buffer);
2038}
2039
2040void radv_CmdClearAttachments(
2041	VkCommandBuffer                             commandBuffer,
2042	uint32_t                                    attachmentCount,
2043	const VkClearAttachment*                    pAttachments,
2044	uint32_t                                    rectCount,
2045	const VkClearRect*                          pRects)
2046{
2047	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2048	struct radv_meta_saved_state saved_state;
2049	enum radv_cmd_flush_bits pre_flush = 0;
2050	enum radv_cmd_flush_bits post_flush = 0;
2051
2052	if (!cmd_buffer->state.subpass)
2053		return;
2054
2055	radv_meta_save(&saved_state, cmd_buffer,
2056		       RADV_META_SAVE_GRAPHICS_PIPELINE |
2057		       RADV_META_SAVE_CONSTANTS);
2058
2059	/* FINISHME: We can do better than this dumb loop. It thrashes too much
2060	 * state.
2061	 */
2062	for (uint32_t a = 0; a < attachmentCount; ++a) {
2063		for (uint32_t r = 0; r < rectCount; ++r) {
2064			emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2065			           cmd_buffer->state.subpass->view_mask);
2066		}
2067	}
2068
2069	radv_meta_restore(&saved_state, cmd_buffer);
2070	cmd_buffer->state.flush_bits |= post_flush;
2071}
2072