radv_meta_decompress.c revision 01e04c3f
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26
27#include "radv_meta.h"
28#include "radv_private.h"
29#include "sid.h"
30
31static VkResult
32create_pass(struct radv_device *device,
33	    uint32_t samples,
34	    VkRenderPass *pass)
35{
36	VkResult result;
37	VkDevice device_h = radv_device_to_handle(device);
38	const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
39	VkAttachmentDescription attachment;
40
41	attachment.flags = 0;
42	attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
43	attachment.samples = samples;
44	attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
45	attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
46	attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
47	attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
48	attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
49	attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
50
51	result = radv_CreateRenderPass(device_h,
52				       &(VkRenderPassCreateInfo) {
53					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
54						       .attachmentCount = 1,
55						       .pAttachments = &attachment,
56						       .subpassCount = 1,
57							.pSubpasses = &(VkSubpassDescription) {
58						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
59						       .inputAttachmentCount = 0,
60						       .colorAttachmentCount = 0,
61						       .pColorAttachments = NULL,
62						       .pResolveAttachments = NULL,
63						       .pDepthStencilAttachment = &(VkAttachmentReference) {
64							       .attachment = 0,
65							       .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
66						       },
67						       .preserveAttachmentCount = 0,
68						       .pPreserveAttachments = NULL,
69					       },
70								.dependencyCount = 0,
71								   },
72				       alloc,
73				       pass);
74
75	return result;
76}
77
78static VkResult
79create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
80{
81	VkPipelineLayoutCreateInfo pl_create_info = {
82		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
83		.setLayoutCount = 0,
84		.pSetLayouts = NULL,
85		.pushConstantRangeCount = 0,
86		.pPushConstantRanges = NULL,
87	};
88
89	return radv_CreatePipelineLayout(radv_device_to_handle(device),
90					 &pl_create_info,
91					 &device->meta_state.alloc,
92					 layout);
93}
94
95static VkResult
96create_pipeline(struct radv_device *device,
97                VkShaderModule vs_module_h,
98		uint32_t samples,
99		VkRenderPass pass,
100		VkPipelineLayout layout,
101		VkPipeline *decompress_pipeline,
102		VkPipeline *resummarize_pipeline)
103{
104	VkResult result;
105	VkDevice device_h = radv_device_to_handle(device);
106	struct radv_shader_module vs_module = {0};
107
108	mtx_lock(&device->meta_state.mtx);
109	if (*decompress_pipeline) {
110		mtx_unlock(&device->meta_state.mtx);
111		return VK_SUCCESS;
112	}
113
114	if (!vs_module_h) {
115		vs_module.nir = radv_meta_build_nir_vs_generate_vertices();
116		vs_module_h = radv_shader_module_to_handle(&vs_module);
117	}
118
119	struct radv_shader_module fs_module = {
120		.nir = radv_meta_build_nir_fs_noop(),
121	};
122
123	if (!fs_module.nir) {
124		/* XXX: Need more accurate error */
125		result = VK_ERROR_OUT_OF_HOST_MEMORY;
126		goto cleanup;
127	}
128
129	const VkGraphicsPipelineCreateInfo pipeline_create_info = {
130		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
131		.stageCount = 2,
132		.pStages = (VkPipelineShaderStageCreateInfo[]) {
133		       {
134				.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
135				.stage = VK_SHADER_STAGE_VERTEX_BIT,
136				.module = vs_module_h,
137				.pName = "main",
138			},
139			{
140				.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
141				.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
142				.module = radv_shader_module_to_handle(&fs_module),
143				.pName = "main",
144			},
145		},
146		.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
147			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
148			.vertexBindingDescriptionCount = 0,
149			.vertexAttributeDescriptionCount = 0,
150		},
151		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
152			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
153			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
154			.primitiveRestartEnable = false,
155		},
156		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
157			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
158			.viewportCount = 1,
159			.scissorCount = 1,
160		},
161		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
162			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
163			.depthClampEnable = false,
164			.rasterizerDiscardEnable = false,
165			.polygonMode = VK_POLYGON_MODE_FILL,
166			.cullMode = VK_CULL_MODE_NONE,
167			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
168		},
169		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
170			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
171			.rasterizationSamples = samples,
172			.sampleShadingEnable = false,
173			.pSampleMask = NULL,
174			.alphaToCoverageEnable = false,
175			.alphaToOneEnable = false,
176		},
177		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
178			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
179			.logicOpEnable = false,
180			.attachmentCount = 0,
181			.pAttachments = NULL,
182		},
183		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
184			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
185			.depthTestEnable = false,
186			.depthWriteEnable = false,
187			.depthBoundsTestEnable = false,
188			.stencilTestEnable = false,
189		},
190		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
191			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
192			.dynamicStateCount = 2,
193			.pDynamicStates = (VkDynamicState[]) {
194				VK_DYNAMIC_STATE_VIEWPORT,
195				VK_DYNAMIC_STATE_SCISSOR,
196			},
197		},
198		.layout = layout,
199		.renderPass = pass,
200		.subpass = 0,
201	};
202
203	result = radv_graphics_pipeline_create(device_h,
204					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
205					       &pipeline_create_info,
206					       &(struct radv_graphics_pipeline_create_info) {
207							.use_rectlist = true,
208							.db_flush_depth_inplace = true,
209							.db_flush_stencil_inplace = true,
210					       },
211					       &device->meta_state.alloc,
212					       decompress_pipeline);
213	if (result != VK_SUCCESS)
214		goto cleanup;
215
216	result = radv_graphics_pipeline_create(device_h,
217					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
218					       &pipeline_create_info,
219					       &(struct radv_graphics_pipeline_create_info) {
220							.use_rectlist = true,
221							.db_flush_depth_inplace = true,
222							.db_flush_stencil_inplace = true,
223							.db_resummarize = true,
224					       },
225					       &device->meta_state.alloc,
226					       resummarize_pipeline);
227	if (result != VK_SUCCESS)
228		goto cleanup;
229
230	goto cleanup;
231
232cleanup:
233	ralloc_free(fs_module.nir);
234	if (vs_module.nir)
235		ralloc_free(vs_module.nir);
236	mtx_unlock(&device->meta_state.mtx);
237	return result;
238}
239
240void
241radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
242{
243	struct radv_meta_state *state = &device->meta_state;
244
245	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
246		radv_DestroyRenderPass(radv_device_to_handle(device),
247				       state->depth_decomp[i].pass,
248				       &state->alloc);
249		radv_DestroyPipelineLayout(radv_device_to_handle(device),
250					   state->depth_decomp[i].p_layout,
251					   &state->alloc);
252		radv_DestroyPipeline(radv_device_to_handle(device),
253				     state->depth_decomp[i].decompress_pipeline,
254				     &state->alloc);
255		radv_DestroyPipeline(radv_device_to_handle(device),
256				     state->depth_decomp[i].resummarize_pipeline,
257				     &state->alloc);
258	}
259}
260
261VkResult
262radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
263{
264	struct radv_meta_state *state = &device->meta_state;
265	VkResult res = VK_SUCCESS;
266
267	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
268	if (!vs_module.nir) {
269		/* XXX: Need more accurate error */
270		res = VK_ERROR_OUT_OF_HOST_MEMORY;
271		goto fail;
272	}
273
274	VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
275
276	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
277		uint32_t samples = 1 << i;
278
279		res = create_pass(device, samples, &state->depth_decomp[i].pass);
280		if (res != VK_SUCCESS)
281			goto fail;
282
283		res = create_pipeline_layout(device,
284					     &state->depth_decomp[i].p_layout);
285		if (res != VK_SUCCESS)
286			goto fail;
287
288		if (on_demand)
289			continue;
290
291		res = create_pipeline(device, vs_module_h, samples,
292				      state->depth_decomp[i].pass,
293				      state->depth_decomp[i].p_layout,
294				      &state->depth_decomp[i].decompress_pipeline,
295				      &state->depth_decomp[i].resummarize_pipeline);
296		if (res != VK_SUCCESS)
297			goto fail;
298	}
299
300	goto cleanup;
301
302fail:
303	radv_device_finish_meta_depth_decomp_state(device);
304
305cleanup:
306	ralloc_free(vs_module.nir);
307
308	return res;
309}
310
311static void
312emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
313		  const VkExtent2D *depth_decomp_extent,
314		  VkPipeline pipeline_h)
315{
316	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
317
318	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
319			     pipeline_h);
320
321	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
322		.x = 0,
323		.y = 0,
324		.width = depth_decomp_extent->width,
325		.height = depth_decomp_extent->height,
326		.minDepth = 0.0f,
327		.maxDepth = 1.0f
328	});
329
330	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
331		.offset = { 0, 0 },
332		.extent = *depth_decomp_extent,
333	});
334
335	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
336}
337
338
339enum radv_depth_op {
340	DEPTH_DECOMPRESS,
341	DEPTH_RESUMMARIZE,
342};
343
344static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
345					     struct radv_image *image,
346					     VkImageSubresourceRange *subresourceRange,
347					     enum radv_depth_op op)
348{
349	struct radv_meta_saved_state saved_state;
350	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
351	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
352	uint32_t width = radv_minify(image->info.width,
353				     subresourceRange->baseMipLevel);
354	uint32_t height = radv_minify(image->info.height,
355				     subresourceRange->baseMipLevel);
356	uint32_t samples = image->info.samples;
357	uint32_t samples_log2 = ffs(samples) - 1;
358	struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
359	VkPipeline pipeline_h;
360
361	if (!radv_image_has_htile(image))
362		return;
363
364	if (!meta_state->depth_decomp[samples_log2].decompress_pipeline) {
365		VkResult ret = create_pipeline(cmd_buffer->device, VK_NULL_HANDLE, samples,
366		                               meta_state->depth_decomp[samples_log2].pass,
367		                               meta_state->depth_decomp[samples_log2].p_layout,
368		                               &meta_state->depth_decomp[samples_log2].decompress_pipeline,
369		                               &meta_state->depth_decomp[samples_log2].resummarize_pipeline);
370		if (ret != VK_SUCCESS) {
371			cmd_buffer->record_result = ret;
372			return;
373		}
374	}
375
376	radv_meta_save(&saved_state, cmd_buffer,
377		       RADV_META_SAVE_GRAPHICS_PIPELINE |
378		       RADV_META_SAVE_PASS);
379
380	switch (op) {
381	case DEPTH_DECOMPRESS:
382		pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline;
383		break;
384	case DEPTH_RESUMMARIZE:
385		pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline;
386		break;
387	default:
388		unreachable("unknown operation");
389	}
390
391	for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
392		struct radv_image_view iview;
393
394		radv_image_view_init(&iview, cmd_buffer->device,
395				     &(VkImageViewCreateInfo) {
396					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
397					     .image = radv_image_to_handle(image),
398					     .viewType = radv_meta_get_view_type(image),
399					     .format = image->vk_format,
400					     .subresourceRange = {
401						     .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
402						     .baseMipLevel = subresourceRange->baseMipLevel,
403						     .levelCount = 1,
404						     .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
405						     .layerCount = 1,
406					     },
407				     });
408
409
410		VkFramebuffer fb_h;
411		radv_CreateFramebuffer(device_h,
412				       &(VkFramebufferCreateInfo) {
413					       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
414					       .attachmentCount = 1,
415						       .pAttachments = (VkImageView[]) {
416						       radv_image_view_to_handle(&iview)
417					       },
418					       .width = width,
419						.height = height,
420					       .layers = 1
421				       },
422				       &cmd_buffer->pool->alloc,
423				       &fb_h);
424
425		radv_CmdBeginRenderPass(cmd_buffer_h,
426					      &(VkRenderPassBeginInfo) {
427						      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
428							      .renderPass = meta_state->depth_decomp[samples_log2].pass,
429							      .framebuffer = fb_h,
430							      .renderArea = {
431							      .offset = {
432								      0,
433								      0,
434							      },
435							      .extent = {
436								      width,
437								      height,
438							      }
439						       },
440						       .clearValueCount = 0,
441						       .pClearValues = NULL,
442					   },
443					   VK_SUBPASS_CONTENTS_INLINE);
444
445		emit_depth_decomp(cmd_buffer, &(VkExtent2D){width, height}, pipeline_h);
446		radv_CmdEndRenderPass(cmd_buffer_h);
447
448		radv_DestroyFramebuffer(device_h, fb_h,
449					&cmd_buffer->pool->alloc);
450	}
451	radv_meta_restore(&saved_state, cmd_buffer);
452}
453
454void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
455					 struct radv_image *image,
456					 VkImageSubresourceRange *subresourceRange)
457{
458	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
459	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_DECOMPRESS);
460}
461
462void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
463					 struct radv_image *image,
464					 VkImageSubresourceRange *subresourceRange)
465{
466	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
467	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_RESUMMARIZE);
468}
469