1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "radv_meta.h"
28#include "radv_private.h"
29#include "sid.h"
30
31
32static nir_shader *
33build_dcc_decompress_compute_shader(struct radv_device *dev)
34{
35	nir_builder b;
36	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
37							     false,
38							     false,
39							     GLSL_TYPE_FLOAT);
40	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
41							     false,
42							     false,
43							     GLSL_TYPE_FLOAT);
44	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
45	b.shader->info.name = ralloc_strdup(b.shader, "dcc_decompress_compute");
46
47	/* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
48	b.shader->info.cs.local_size[0] = 16;
49	b.shader->info.cs.local_size[1] = 16;
50	b.shader->info.cs.local_size[2] = 1;
51	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
52						      buf_type, "s_tex");
53	input_img->data.descriptor_set = 0;
54	input_img->data.binding = 0;
55
56	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
57						       img_type, "out_img");
58	output_img->data.descriptor_set = 0;
59	output_img->data.binding = 1;
60
61	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
62	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
63	nir_ssa_def *block_size = nir_imm_ivec4(&b,
64						b.shader->info.cs.local_size[0],
65						b.shader->info.cs.local_size[1],
66						b.shader->info.cs.local_size[2], 0);
67
68	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
69	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
70
71	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
72	tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
73	tex->op = nir_texop_txf;
74	tex->src[0].src_type = nir_tex_src_coord;
75	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
76	tex->src[1].src_type = nir_tex_src_lod;
77	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
78	tex->src[2].src_type = nir_tex_src_texture_deref;
79	tex->src[2].src = nir_src_for_ssa(input_img_deref);
80	tex->dest_type = nir_type_float;
81	tex->is_array = false;
82	tex->coord_components = 2;
83
84	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
85	nir_builder_instr_insert(&b, &tex->instr);
86
87	nir_intrinsic_instr *membar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_memory_barrier);
88	nir_builder_instr_insert(&b, &membar->instr);
89
90	nir_intrinsic_instr *bar = nir_intrinsic_instr_create(b.shader, nir_intrinsic_barrier);
91	nir_builder_instr_insert(&b, &bar->instr);
92
93	nir_ssa_def *outval = &tex->dest.ssa;
94	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
95	store->num_components = 4;
96	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
97	store->src[1] = nir_src_for_ssa(global_id);
98	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
99	store->src[3] = nir_src_for_ssa(outval);
100
101	nir_builder_instr_insert(&b, &store->instr);
102	return b.shader;
103}
104
105static VkResult
106create_dcc_compress_compute(struct radv_device *device)
107{
108	VkResult result = VK_SUCCESS;
109	struct radv_shader_module cs = { .nir = NULL };
110
111	cs.nir = build_dcc_decompress_compute_shader(device);
112
113	VkDescriptorSetLayoutCreateInfo ds_create_info = {
114		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
115		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
116		.bindingCount = 2,
117		.pBindings = (VkDescriptorSetLayoutBinding[]) {
118			{
119				.binding = 0,
120				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
121				.descriptorCount = 1,
122				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
123				.pImmutableSamplers = NULL
124			},
125			{
126				.binding = 1,
127				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
128				.descriptorCount = 1,
129				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
130				.pImmutableSamplers = NULL
131			},
132		}
133	};
134
135	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
136						&ds_create_info,
137						&device->meta_state.alloc,
138						&device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
139	if (result != VK_SUCCESS)
140		goto cleanup;
141
142
143	VkPipelineLayoutCreateInfo pl_create_info = {
144		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
145		.setLayoutCount = 1,
146		.pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
147		.pushConstantRangeCount = 1,
148		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 8},
149	};
150
151	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
152					  &pl_create_info,
153					  &device->meta_state.alloc,
154					  &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
155	if (result != VK_SUCCESS)
156		goto cleanup;
157
158	/* compute shader */
159
160	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
161		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
162		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
163		.module = radv_shader_module_to_handle(&cs),
164		.pName = "main",
165		.pSpecializationInfo = NULL,
166	};
167
168	VkComputePipelineCreateInfo vk_pipeline_info = {
169		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
170		.stage = pipeline_shader_stage,
171		.flags = 0,
172		.layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
173	};
174
175	result = radv_CreateComputePipelines(radv_device_to_handle(device),
176					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
177					     1, &vk_pipeline_info, NULL,
178					     &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
179	if (result != VK_SUCCESS)
180		goto cleanup;
181
182cleanup:
183	ralloc_free(cs.nir);
184	return result;
185}
186
187static VkResult
188create_pass(struct radv_device *device)
189{
190	VkResult result;
191	VkDevice device_h = radv_device_to_handle(device);
192	const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
193	VkAttachmentDescription attachment;
194
195	attachment.format = VK_FORMAT_UNDEFINED;
196	attachment.samples = 1;
197	attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
198	attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
199	attachment.initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
200	attachment.finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
201
202	result = radv_CreateRenderPass(device_h,
203				       &(VkRenderPassCreateInfo) {
204					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
205						       .attachmentCount = 1,
206						       .pAttachments = &attachment,
207						       .subpassCount = 1,
208						       .pSubpasses = &(VkSubpassDescription) {
209						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
210						       .inputAttachmentCount = 0,
211						       .colorAttachmentCount = 1,
212						       .pColorAttachments = (VkAttachmentReference[]) {
213							       {
214								       .attachment = 0,
215								       .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
216							       },
217						       },
218						       .pResolveAttachments = NULL,
219						       .pDepthStencilAttachment = &(VkAttachmentReference) {
220							       .attachment = VK_ATTACHMENT_UNUSED,
221						       },
222						       .preserveAttachmentCount = 0,
223						       .pPreserveAttachments = NULL,
224					       },
225								.dependencyCount = 0,
226				       },
227				       alloc,
228				       &device->meta_state.fast_clear_flush.pass);
229
230	return result;
231}
232
233static VkResult
234create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
235{
236	VkPipelineLayoutCreateInfo pl_create_info = {
237		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
238		.setLayoutCount = 0,
239		.pSetLayouts = NULL,
240		.pushConstantRangeCount = 0,
241		.pPushConstantRanges = NULL,
242	};
243
244	return radv_CreatePipelineLayout(radv_device_to_handle(device),
245					 &pl_create_info,
246					 &device->meta_state.alloc,
247					 layout);
248}
249
250static VkResult
251create_pipeline(struct radv_device *device,
252		VkShaderModule vs_module_h,
253		VkPipelineLayout layout)
254{
255	VkResult result;
256	VkDevice device_h = radv_device_to_handle(device);
257
258	struct radv_shader_module fs_module = {
259		.nir = radv_meta_build_nir_fs_noop(),
260	};
261
262	if (!fs_module.nir) {
263		/* XXX: Need more accurate error */
264		result = VK_ERROR_OUT_OF_HOST_MEMORY;
265		goto cleanup;
266	}
267
268	const VkPipelineShaderStageCreateInfo stages[2] = {
269		{
270			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
271			.stage = VK_SHADER_STAGE_VERTEX_BIT,
272			.module = vs_module_h,
273			.pName = "main",
274		},
275		{
276			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
277			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
278			.module = radv_shader_module_to_handle(&fs_module),
279			.pName = "main",
280		},
281	};
282
283	const VkPipelineVertexInputStateCreateInfo vi_state = {
284		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
285		.vertexBindingDescriptionCount = 0,
286		.vertexAttributeDescriptionCount = 0,
287	};
288
289	const VkPipelineInputAssemblyStateCreateInfo ia_state = {
290		.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
291		.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
292		.primitiveRestartEnable = false,
293	};
294
295	const VkPipelineColorBlendStateCreateInfo blend_state = {
296		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
297		.logicOpEnable = false,
298		.attachmentCount = 1,
299		.pAttachments = (VkPipelineColorBlendAttachmentState []) {
300			{
301				.colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
302				VK_COLOR_COMPONENT_G_BIT |
303				VK_COLOR_COMPONENT_B_BIT |
304				VK_COLOR_COMPONENT_A_BIT,
305			},
306		}
307	};
308	const VkPipelineRasterizationStateCreateInfo rs_state = {
309		.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
310		.depthClampEnable = false,
311		.rasterizerDiscardEnable = false,
312		.polygonMode = VK_POLYGON_MODE_FILL,
313		.cullMode = VK_CULL_MODE_NONE,
314		.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
315	};
316
317	result = radv_graphics_pipeline_create(device_h,
318					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
319					       &(VkGraphicsPipelineCreateInfo) {
320						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
321						       .stageCount = 2,
322						       .pStages = stages,
323
324						       .pVertexInputState = &vi_state,
325						       .pInputAssemblyState = &ia_state,
326
327					       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
328						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
329						       .viewportCount = 1,
330						       .scissorCount = 1,
331					       },
332						       .pRasterizationState = &rs_state,
333					       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
334						       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
335						       .rasterizationSamples = 1,
336						       .sampleShadingEnable = false,
337						       .pSampleMask = NULL,
338						       .alphaToCoverageEnable = false,
339						       .alphaToOneEnable = false,
340					       },
341						.pColorBlendState = &blend_state,
342						.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
343							.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
344							.dynamicStateCount = 2,
345							.pDynamicStates = (VkDynamicState[]) {
346								VK_DYNAMIC_STATE_VIEWPORT,
347								VK_DYNAMIC_STATE_SCISSOR,
348							},
349						},
350					        .layout = layout,
351						.renderPass = device->meta_state.fast_clear_flush.pass,
352						.subpass = 0,
353					       },
354					       &(struct radv_graphics_pipeline_create_info) {
355						       .use_rectlist = true,
356						       .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
357					       },
358					       &device->meta_state.alloc,
359					       &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
360	if (result != VK_SUCCESS)
361		goto cleanup;
362
363	result = radv_graphics_pipeline_create(device_h,
364					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
365					       &(VkGraphicsPipelineCreateInfo) {
366						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
367						       .stageCount = 2,
368						       .pStages = stages,
369
370						       .pVertexInputState = &vi_state,
371						       .pInputAssemblyState = &ia_state,
372
373					       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
374						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
375						       .viewportCount = 1,
376						       .scissorCount = 1,
377					       },
378						       .pRasterizationState = &rs_state,
379					       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
380						       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
381						       .rasterizationSamples = 1,
382						       .sampleShadingEnable = false,
383						       .pSampleMask = NULL,
384						       .alphaToCoverageEnable = false,
385						       .alphaToOneEnable = false,
386					       },
387						.pColorBlendState = &blend_state,
388						.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
389							.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
390							.dynamicStateCount = 2,
391							.pDynamicStates = (VkDynamicState[]) {
392								VK_DYNAMIC_STATE_VIEWPORT,
393								VK_DYNAMIC_STATE_SCISSOR,
394							},
395						},
396						.layout = layout,
397						.renderPass = device->meta_state.fast_clear_flush.pass,
398						.subpass = 0,
399					       },
400					       &(struct radv_graphics_pipeline_create_info) {
401						       .use_rectlist = true,
402						       .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
403					       },
404					       &device->meta_state.alloc,
405					       &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
406	if (result != VK_SUCCESS)
407		goto cleanup;
408
409	result = radv_graphics_pipeline_create(device_h,
410					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
411					       &(VkGraphicsPipelineCreateInfo) {
412						       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
413						       .stageCount = 2,
414						       .pStages = stages,
415
416						       .pVertexInputState = &vi_state,
417						       .pInputAssemblyState = &ia_state,
418
419					       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
420						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
421						       .viewportCount = 1,
422						       .scissorCount = 1,
423					       },
424						       .pRasterizationState = &rs_state,
425					       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
426						       .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
427						       .rasterizationSamples = 1,
428						       .sampleShadingEnable = false,
429						       .pSampleMask = NULL,
430						       .alphaToCoverageEnable = false,
431						       .alphaToOneEnable = false,
432					       },
433						.pColorBlendState = &blend_state,
434						.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
435							.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
436							.dynamicStateCount = 2,
437							.pDynamicStates = (VkDynamicState[]) {
438								VK_DYNAMIC_STATE_VIEWPORT,
439								VK_DYNAMIC_STATE_SCISSOR,
440							},
441						},
442						.layout = layout,
443						.renderPass = device->meta_state.fast_clear_flush.pass,
444						.subpass = 0,
445					       },
446					       &(struct radv_graphics_pipeline_create_info) {
447						       .use_rectlist = true,
448						       .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
449					       },
450					       &device->meta_state.alloc,
451					       &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
452	if (result != VK_SUCCESS)
453		goto cleanup;
454
455	goto cleanup;
456
457cleanup:
458	ralloc_free(fs_module.nir);
459	return result;
460}
461
462void
463radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
464{
465	struct radv_meta_state *state = &device->meta_state;
466
467	radv_DestroyPipeline(radv_device_to_handle(device),
468			     state->fast_clear_flush.dcc_decompress_pipeline,
469			     &state->alloc);
470	radv_DestroyPipeline(radv_device_to_handle(device),
471			     state->fast_clear_flush.fmask_decompress_pipeline,
472			     &state->alloc);
473	radv_DestroyPipeline(radv_device_to_handle(device),
474			     state->fast_clear_flush.cmask_eliminate_pipeline,
475			     &state->alloc);
476	radv_DestroyRenderPass(radv_device_to_handle(device),
477			       state->fast_clear_flush.pass, &state->alloc);
478	radv_DestroyPipelineLayout(radv_device_to_handle(device),
479				   state->fast_clear_flush.p_layout,
480				   &state->alloc);
481
482	radv_DestroyPipeline(radv_device_to_handle(device),
483			     state->fast_clear_flush.dcc_decompress_compute_pipeline,
484			     &state->alloc);
485	radv_DestroyPipelineLayout(radv_device_to_handle(device),
486				   state->fast_clear_flush.dcc_decompress_compute_p_layout,
487				   &state->alloc);
488	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
489	                                state->fast_clear_flush.dcc_decompress_compute_ds_layout,
490	                                &state->alloc);
491}
492
493static VkResult
494radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
495{
496	VkResult res = VK_SUCCESS;
497
498	mtx_lock(&device->meta_state.mtx);
499	if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
500		mtx_unlock(&device->meta_state.mtx);
501		return VK_SUCCESS;
502	}
503
504	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
505	if (!vs_module.nir) {
506		/* XXX: Need more accurate error */
507		res = VK_ERROR_OUT_OF_HOST_MEMORY;
508		goto fail;
509	}
510
511	res = create_pass(device);
512	if (res != VK_SUCCESS)
513		goto fail;
514
515	res = create_pipeline_layout(device,
516				     &device->meta_state.fast_clear_flush.p_layout);
517	if (res != VK_SUCCESS)
518		goto fail;
519
520	VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
521	res = create_pipeline(device, vs_module_h,
522			      device->meta_state.fast_clear_flush.p_layout);
523	if (res != VK_SUCCESS)
524		goto fail;
525
526	res = create_dcc_compress_compute(device);
527	if (res != VK_SUCCESS)
528		goto fail;
529
530	goto cleanup;
531
532fail:
533	radv_device_finish_meta_fast_clear_flush_state(device);
534
535cleanup:
536	ralloc_free(vs_module.nir);
537	mtx_unlock(&device->meta_state.mtx);
538
539	return res;
540}
541
542
543VkResult
544radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
545{
546	if (on_demand)
547		return VK_SUCCESS;
548
549	return radv_device_init_meta_fast_clear_flush_state_internal(device);
550}
551
552static void
553radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
554				      struct radv_image *image,
555				      uint64_t pred_offset, bool value)
556{
557	uint64_t va = 0;
558
559	if (value) {
560		va = radv_buffer_get_va(image->bo) + image->offset;
561		va += pred_offset;
562	}
563
564	si_emit_set_predication_state(cmd_buffer, true, va);
565}
566
567/**
568 */
569static void
570radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
571                           struct radv_image *image,
572                           const VkImageSubresourceRange *subresourceRange,
573                           bool decompress_dcc)
574{
575	struct radv_meta_saved_state saved_state;
576	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
577	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
578	uint32_t layer_count = radv_get_layerCount(image, subresourceRange);
579	bool old_predicating = false;
580	VkPipeline pipeline;
581
582	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
583
584	if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
585		VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
586		if (ret != VK_SUCCESS) {
587			cmd_buffer->record_result = ret;
588			return;
589		}
590	}
591
592	radv_meta_save(&saved_state, cmd_buffer,
593		       RADV_META_SAVE_GRAPHICS_PIPELINE |
594		       RADV_META_SAVE_PASS);
595
596	if (decompress_dcc && radv_image_has_dcc(image)) {
597		pipeline = cmd_buffer->device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
598	} else if (radv_image_has_fmask(image)) {
599               pipeline = cmd_buffer->device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
600	} else {
601               pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
602	}
603
604	if (radv_image_has_dcc(image)) {
605		uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
606							image->fce_pred_offset;
607
608		old_predicating = cmd_buffer->state.predicating;
609
610		radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
611		cmd_buffer->state.predicating = true;
612	}
613
614	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
615			     pipeline);
616
617	radv_CmdSetViewport(cmd_buffer_h, 0, 1, &(VkViewport) {
618		.x = 0,
619		.y = 0,
620		.width = image->info.width,
621		.height = image->info.height,
622		.minDepth = 0.0f,
623		.maxDepth = 1.0f
624	});
625
626	radv_CmdSetScissor(cmd_buffer_h, 0, 1, &(VkRect2D) {
627		.offset = (VkOffset2D) { 0, 0 },
628		.extent = (VkExtent2D) { image->info.width, image->info.height },
629	});
630
631	for (uint32_t layer = 0; layer < layer_count; ++layer) {
632		struct radv_image_view iview;
633
634		radv_image_view_init(&iview, cmd_buffer->device,
635				     &(VkImageViewCreateInfo) {
636					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
637					     .image = radv_image_to_handle(image),
638					     .viewType = radv_meta_get_view_type(image),
639					     .format = image->vk_format,
640					     .subresourceRange = {
641						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
642						     .baseMipLevel = 0,
643						     .levelCount = 1,
644						     .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
645						     .layerCount = 1,
646					      },
647				     });
648
649		VkFramebuffer fb_h;
650		radv_CreateFramebuffer(device_h,
651				&(VkFramebufferCreateInfo) {
652					.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
653					.attachmentCount = 1,
654					.pAttachments = (VkImageView[]) {
655						radv_image_view_to_handle(&iview)
656					},
657				       .width = image->info.width,
658				       .height = image->info.height,
659				       .layers = 1
660				},
661				&cmd_buffer->pool->alloc,
662				&fb_h);
663
664		radv_CmdBeginRenderPass(cmd_buffer_h,
665				      &(VkRenderPassBeginInfo) {
666					      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
667						      .renderPass = cmd_buffer->device->meta_state.fast_clear_flush.pass,
668						      .framebuffer = fb_h,
669						      .renderArea = {
670						      .offset = {
671							      0,
672							      0,
673						      },
674						      .extent = {
675							      image->info.width,
676							      image->info.height,
677						      }
678					      },
679					      .clearValueCount = 0,
680					      .pClearValues = NULL,
681				     },
682				     VK_SUBPASS_CONTENTS_INLINE);
683
684		radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
685
686		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
687						RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
688
689		radv_CmdEndRenderPass(cmd_buffer_h);
690
691		radv_DestroyFramebuffer(device_h, fb_h,
692					&cmd_buffer->pool->alloc);
693
694	}
695	if (radv_image_has_dcc(image)) {
696		uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
697							image->fce_pred_offset;
698
699		cmd_buffer->state.predicating = old_predicating;
700
701		radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
702
703		if (cmd_buffer->state.predication_type != -1) {
704			/* Restore previous conditional rendering user state. */
705			si_emit_set_predication_state(cmd_buffer,
706						      cmd_buffer->state.predication_type,
707						      cmd_buffer->state.predication_va);
708		}
709	}
710
711	if (radv_image_has_dcc(image)) {
712		/* Clear the image's fast-clear eliminate predicate because
713		 * FMASK and DCC also imply a fast-clear eliminate.
714		 */
715		radv_update_fce_metadata(cmd_buffer, image, false);
716
717		/* Mark the image as being decompressed. */
718		if (decompress_dcc)
719			radv_update_dcc_metadata(cmd_buffer, image, false);
720	}
721
722	radv_meta_restore(&saved_state, cmd_buffer);
723}
724
725void
726radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
727                                    struct radv_image *image,
728                                    const VkImageSubresourceRange *subresourceRange)
729{
730	radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
731}
732
733static void
734radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
735                        struct radv_image *image,
736                        const VkImageSubresourceRange *subresourceRange)
737{
738	radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
739}
740
741static void
742radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
743                            struct radv_image *image,
744                            const VkImageSubresourceRange *subresourceRange)
745{
746	struct radv_meta_saved_state saved_state;
747	struct radv_image_view iview = {0};
748	struct radv_device *device = cmd_buffer->device;
749
750	/* This assumes the image is 2d with 1 layer and 1 mipmap level */
751	struct radv_cmd_state *state = &cmd_buffer->state;
752
753	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
754			     RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
755
756	radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
757	                                         RADV_META_SAVE_COMPUTE_PIPELINE);
758
759	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
760	                     VK_PIPELINE_BIND_POINT_COMPUTE,
761	                     device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
762
763	radv_image_view_init(&iview, cmd_buffer->device,
764			     &(VkImageViewCreateInfo) {
765				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
766					     .image = radv_image_to_handle(image),
767					     .viewType = VK_IMAGE_VIEW_TYPE_2D,
768					     .format = image->vk_format,
769					     .subresourceRange = {
770						.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
771						.baseMipLevel = 0,
772						.levelCount = 1,
773						.baseArrayLayer = 0,
774						.layerCount = 1
775					     },
776			     });
777
778	radv_meta_push_descriptor_set(cmd_buffer,
779				      VK_PIPELINE_BIND_POINT_COMPUTE,
780				      device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
781				      0, /* set */
782				      2, /* descriptorWriteCount */
783				      (VkWriteDescriptorSet[]) {
784				              {
785				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
786				                       .dstBinding = 0,
787				                       .dstArrayElement = 0,
788				                       .descriptorCount = 1,
789				                       .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
790				                       .pImageInfo = (VkDescriptorImageInfo[]) {
791				                               {
792				                                       .sampler = VK_NULL_HANDLE,
793				                                       .imageView = radv_image_view_to_handle(&iview),
794				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
795				                               },
796				                       }
797				              },
798				              {
799				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
800				                       .dstBinding = 1,
801				                       .dstArrayElement = 0,
802				                       .descriptorCount = 1,
803				                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
804				                       .pImageInfo = (VkDescriptorImageInfo[]) {
805				                               {
806				                                       .sampler = VK_NULL_HANDLE,
807				                                       .imageView = radv_image_view_to_handle(&iview),
808				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
809				                               },
810				                       }
811				              }
812				      });
813
814	radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, 1);
815
816	/* Mark this image as actually being decompressed. */
817	radv_update_dcc_metadata(cmd_buffer, image, false);
818
819	/* The fill buffer below does its own saving */
820	radv_meta_restore(&saved_state, cmd_buffer);
821
822	state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
823			     RADV_CMD_FLAG_INV_VMEM_L1;
824
825	state->flush_bits |= radv_clear_dcc(cmd_buffer, image, 0xffffffff);
826
827	state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
828			     RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
829}
830
831void
832radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
833                    struct radv_image *image,
834                    const VkImageSubresourceRange *subresourceRange)
835{
836	if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
837		radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
838	else
839		radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
840}
841