1b8e80941Smrg#include "radv_meta.h"
2b8e80941Smrg#include "nir/nir_builder.h"
3b8e80941Smrg
4b8e80941Smrg#include "sid.h"
5b8e80941Smrg#include "radv_cs.h"
6b8e80941Smrg
7b8e80941Smrgstatic nir_shader *
8b8e80941Smrgbuild_buffer_fill_shader(struct radv_device *dev)
9b8e80941Smrg{
10b8e80941Smrg	nir_builder b;
11b8e80941Smrg
12b8e80941Smrg	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13b8e80941Smrg	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
14b8e80941Smrg	b.shader->info.cs.local_size[0] = 64;
15b8e80941Smrg	b.shader->info.cs.local_size[1] = 1;
16b8e80941Smrg	b.shader->info.cs.local_size[2] = 1;
17b8e80941Smrg
18b8e80941Smrg	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
19b8e80941Smrg	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
20b8e80941Smrg	nir_ssa_def *block_size = nir_imm_ivec4(&b,
21b8e80941Smrg						b.shader->info.cs.local_size[0],
22b8e80941Smrg						b.shader->info.cs.local_size[1],
23b8e80941Smrg						b.shader->info.cs.local_size[2], 0);
24b8e80941Smrg
25b8e80941Smrg	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26b8e80941Smrg
27b8e80941Smrg	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28b8e80941Smrg	offset = nir_channel(&b, offset, 0);
29b8e80941Smrg
30b8e80941Smrg	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
31b8e80941Smrg	                                                          nir_intrinsic_vulkan_resource_index);
32b8e80941Smrg	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
33b8e80941Smrg	dst_buf->num_components = 1;
34b8e80941Smrg	nir_intrinsic_set_desc_set(dst_buf, 0);
35b8e80941Smrg	nir_intrinsic_set_binding(dst_buf, 0);
36b8e80941Smrg	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
37b8e80941Smrg	nir_builder_instr_insert(&b, &dst_buf->instr);
38b8e80941Smrg
39b8e80941Smrg	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
40b8e80941Smrg	nir_intrinsic_set_base(load, 0);
41b8e80941Smrg	nir_intrinsic_set_range(load, 4);
42b8e80941Smrg	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
43b8e80941Smrg	load->num_components = 1;
44b8e80941Smrg	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
45b8e80941Smrg	nir_builder_instr_insert(&b, &load->instr);
46b8e80941Smrg
47b8e80941Smrg	nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);
48b8e80941Smrg
49b8e80941Smrg	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
50b8e80941Smrg	store->src[0] = nir_src_for_ssa(swizzled_load);
51b8e80941Smrg	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
52b8e80941Smrg	store->src[2] = nir_src_for_ssa(offset);
53b8e80941Smrg	nir_intrinsic_set_write_mask(store, 0xf);
54b8e80941Smrg	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
55b8e80941Smrg	store->num_components = 4;
56b8e80941Smrg	nir_builder_instr_insert(&b, &store->instr);
57b8e80941Smrg
58b8e80941Smrg	return b.shader;
59b8e80941Smrg}
60b8e80941Smrg
61b8e80941Smrgstatic nir_shader *
62b8e80941Smrgbuild_buffer_copy_shader(struct radv_device *dev)
63b8e80941Smrg{
64b8e80941Smrg	nir_builder b;
65b8e80941Smrg
66b8e80941Smrg	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
67b8e80941Smrg	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
68b8e80941Smrg	b.shader->info.cs.local_size[0] = 64;
69b8e80941Smrg	b.shader->info.cs.local_size[1] = 1;
70b8e80941Smrg	b.shader->info.cs.local_size[2] = 1;
71b8e80941Smrg
72b8e80941Smrg	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
73b8e80941Smrg	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
74b8e80941Smrg	nir_ssa_def *block_size = nir_imm_ivec4(&b,
75b8e80941Smrg						b.shader->info.cs.local_size[0],
76b8e80941Smrg						b.shader->info.cs.local_size[1],
77b8e80941Smrg						b.shader->info.cs.local_size[2], 0);
78b8e80941Smrg
79b8e80941Smrg	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
80b8e80941Smrg
81b8e80941Smrg	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
82b8e80941Smrg	offset = nir_channel(&b, offset, 0);
83b8e80941Smrg
84b8e80941Smrg	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
85b8e80941Smrg	                                                          nir_intrinsic_vulkan_resource_index);
86b8e80941Smrg	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
87b8e80941Smrg	dst_buf->num_components = 1;
88b8e80941Smrg	nir_intrinsic_set_desc_set(dst_buf, 0);
89b8e80941Smrg	nir_intrinsic_set_binding(dst_buf, 0);
90b8e80941Smrg	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
91b8e80941Smrg	nir_builder_instr_insert(&b, &dst_buf->instr);
92b8e80941Smrg
93b8e80941Smrg	nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
94b8e80941Smrg	                                                          nir_intrinsic_vulkan_resource_index);
95b8e80941Smrg	src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
96b8e80941Smrg	src_buf->num_components = 1;
97b8e80941Smrg	nir_intrinsic_set_desc_set(src_buf, 0);
98b8e80941Smrg	nir_intrinsic_set_binding(src_buf, 1);
99b8e80941Smrg	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, src_buf->num_components, 32, NULL);
100b8e80941Smrg	nir_builder_instr_insert(&b, &src_buf->instr);
101b8e80941Smrg
102b8e80941Smrg	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
103b8e80941Smrg	load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
104b8e80941Smrg	load->src[1] = nir_src_for_ssa(offset);
105b8e80941Smrg	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
106b8e80941Smrg	load->num_components = 4;
107b8e80941Smrg	nir_builder_instr_insert(&b, &load->instr);
108b8e80941Smrg
109b8e80941Smrg	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
110b8e80941Smrg	store->src[0] = nir_src_for_ssa(&load->dest.ssa);
111b8e80941Smrg	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
112b8e80941Smrg	store->src[2] = nir_src_for_ssa(offset);
113b8e80941Smrg	nir_intrinsic_set_write_mask(store, 0xf);
114b8e80941Smrg	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
115b8e80941Smrg	store->num_components = 4;
116b8e80941Smrg	nir_builder_instr_insert(&b, &store->instr);
117b8e80941Smrg
118b8e80941Smrg	return b.shader;
119b8e80941Smrg}
120b8e80941Smrg
121b8e80941Smrg
122b8e80941Smrg
123b8e80941SmrgVkResult radv_device_init_meta_buffer_state(struct radv_device *device)
124b8e80941Smrg{
125b8e80941Smrg	VkResult result;
126b8e80941Smrg	struct radv_shader_module fill_cs = { .nir = NULL };
127b8e80941Smrg	struct radv_shader_module copy_cs = { .nir = NULL };
128b8e80941Smrg
129b8e80941Smrg	fill_cs.nir = build_buffer_fill_shader(device);
130b8e80941Smrg	copy_cs.nir = build_buffer_copy_shader(device);
131b8e80941Smrg
132b8e80941Smrg	VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
133b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
134b8e80941Smrg		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
135b8e80941Smrg		.bindingCount = 1,
136b8e80941Smrg		.pBindings = (VkDescriptorSetLayoutBinding[]) {
137b8e80941Smrg			{
138b8e80941Smrg				.binding = 0,
139b8e80941Smrg				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
140b8e80941Smrg				.descriptorCount = 1,
141b8e80941Smrg				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
142b8e80941Smrg				.pImmutableSamplers = NULL
143b8e80941Smrg			},
144b8e80941Smrg		}
145b8e80941Smrg	};
146b8e80941Smrg
147b8e80941Smrg	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
148b8e80941Smrg						&fill_ds_create_info,
149b8e80941Smrg						&device->meta_state.alloc,
150b8e80941Smrg						&device->meta_state.buffer.fill_ds_layout);
151b8e80941Smrg	if (result != VK_SUCCESS)
152b8e80941Smrg		goto fail;
153b8e80941Smrg
154b8e80941Smrg	VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
155b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
156b8e80941Smrg		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
157b8e80941Smrg		.bindingCount = 2,
158b8e80941Smrg		.pBindings = (VkDescriptorSetLayoutBinding[]) {
159b8e80941Smrg			{
160b8e80941Smrg				.binding = 0,
161b8e80941Smrg				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
162b8e80941Smrg				.descriptorCount = 1,
163b8e80941Smrg				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
164b8e80941Smrg				.pImmutableSamplers = NULL
165b8e80941Smrg			},
166b8e80941Smrg			{
167b8e80941Smrg				.binding = 1,
168b8e80941Smrg				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
169b8e80941Smrg				.descriptorCount = 1,
170b8e80941Smrg				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
171b8e80941Smrg				.pImmutableSamplers = NULL
172b8e80941Smrg			},
173b8e80941Smrg		}
174b8e80941Smrg	};
175b8e80941Smrg
176b8e80941Smrg	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
177b8e80941Smrg						&copy_ds_create_info,
178b8e80941Smrg						&device->meta_state.alloc,
179b8e80941Smrg						&device->meta_state.buffer.copy_ds_layout);
180b8e80941Smrg	if (result != VK_SUCCESS)
181b8e80941Smrg		goto fail;
182b8e80941Smrg
183b8e80941Smrg
184b8e80941Smrg	VkPipelineLayoutCreateInfo fill_pl_create_info = {
185b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
186b8e80941Smrg		.setLayoutCount = 1,
187b8e80941Smrg		.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
188b8e80941Smrg		.pushConstantRangeCount = 1,
189b8e80941Smrg		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
190b8e80941Smrg	};
191b8e80941Smrg
192b8e80941Smrg	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
193b8e80941Smrg					  &fill_pl_create_info,
194b8e80941Smrg					  &device->meta_state.alloc,
195b8e80941Smrg					  &device->meta_state.buffer.fill_p_layout);
196b8e80941Smrg	if (result != VK_SUCCESS)
197b8e80941Smrg		goto fail;
198b8e80941Smrg
199b8e80941Smrg	VkPipelineLayoutCreateInfo copy_pl_create_info = {
200b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
201b8e80941Smrg		.setLayoutCount = 1,
202b8e80941Smrg		.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
203b8e80941Smrg		.pushConstantRangeCount = 0,
204b8e80941Smrg	};
205b8e80941Smrg
206b8e80941Smrg	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
207b8e80941Smrg					  &copy_pl_create_info,
208b8e80941Smrg					  &device->meta_state.alloc,
209b8e80941Smrg					  &device->meta_state.buffer.copy_p_layout);
210b8e80941Smrg	if (result != VK_SUCCESS)
211b8e80941Smrg		goto fail;
212b8e80941Smrg
213b8e80941Smrg	VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
214b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
215b8e80941Smrg		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
216b8e80941Smrg		.module = radv_shader_module_to_handle(&fill_cs),
217b8e80941Smrg		.pName = "main",
218b8e80941Smrg		.pSpecializationInfo = NULL,
219b8e80941Smrg	};
220b8e80941Smrg
221b8e80941Smrg	VkComputePipelineCreateInfo fill_vk_pipeline_info = {
222b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
223b8e80941Smrg		.stage = fill_pipeline_shader_stage,
224b8e80941Smrg		.flags = 0,
225b8e80941Smrg		.layout = device->meta_state.buffer.fill_p_layout,
226b8e80941Smrg	};
227b8e80941Smrg
228b8e80941Smrg	result = radv_CreateComputePipelines(radv_device_to_handle(device),
229b8e80941Smrg					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
230b8e80941Smrg					     1, &fill_vk_pipeline_info, NULL,
231b8e80941Smrg					     &device->meta_state.buffer.fill_pipeline);
232b8e80941Smrg	if (result != VK_SUCCESS)
233b8e80941Smrg		goto fail;
234b8e80941Smrg
235b8e80941Smrg	VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
236b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
237b8e80941Smrg		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
238b8e80941Smrg		.module = radv_shader_module_to_handle(&copy_cs),
239b8e80941Smrg		.pName = "main",
240b8e80941Smrg		.pSpecializationInfo = NULL,
241b8e80941Smrg	};
242b8e80941Smrg
243b8e80941Smrg	VkComputePipelineCreateInfo copy_vk_pipeline_info = {
244b8e80941Smrg		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
245b8e80941Smrg		.stage = copy_pipeline_shader_stage,
246b8e80941Smrg		.flags = 0,
247b8e80941Smrg		.layout = device->meta_state.buffer.copy_p_layout,
248b8e80941Smrg	};
249b8e80941Smrg
250b8e80941Smrg	result = radv_CreateComputePipelines(radv_device_to_handle(device),
251b8e80941Smrg					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
252b8e80941Smrg					     1, &copy_vk_pipeline_info, NULL,
253b8e80941Smrg					     &device->meta_state.buffer.copy_pipeline);
254b8e80941Smrg	if (result != VK_SUCCESS)
255b8e80941Smrg		goto fail;
256b8e80941Smrg
257b8e80941Smrg	ralloc_free(fill_cs.nir);
258b8e80941Smrg	ralloc_free(copy_cs.nir);
259b8e80941Smrg	return VK_SUCCESS;
260b8e80941Smrgfail:
261b8e80941Smrg	radv_device_finish_meta_buffer_state(device);
262b8e80941Smrg	ralloc_free(fill_cs.nir);
263b8e80941Smrg	ralloc_free(copy_cs.nir);
264b8e80941Smrg	return result;
265b8e80941Smrg}
266b8e80941Smrg
267b8e80941Smrgvoid radv_device_finish_meta_buffer_state(struct radv_device *device)
268b8e80941Smrg{
269b8e80941Smrg	struct radv_meta_state *state = &device->meta_state;
270b8e80941Smrg
271b8e80941Smrg	radv_DestroyPipeline(radv_device_to_handle(device),
272b8e80941Smrg			     state->buffer.copy_pipeline, &state->alloc);
273b8e80941Smrg	radv_DestroyPipeline(radv_device_to_handle(device),
274b8e80941Smrg			     state->buffer.fill_pipeline, &state->alloc);
275b8e80941Smrg	radv_DestroyPipelineLayout(radv_device_to_handle(device),
276b8e80941Smrg				   state->buffer.copy_p_layout, &state->alloc);
277b8e80941Smrg	radv_DestroyPipelineLayout(radv_device_to_handle(device),
278b8e80941Smrg				   state->buffer.fill_p_layout, &state->alloc);
279b8e80941Smrg	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
280b8e80941Smrg					state->buffer.copy_ds_layout,
281b8e80941Smrg					&state->alloc);
282b8e80941Smrg	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
283b8e80941Smrg					state->buffer.fill_ds_layout,
284b8e80941Smrg					&state->alloc);
285b8e80941Smrg}
286b8e80941Smrg
287b8e80941Smrgstatic void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
288b8e80941Smrg			       struct radeon_winsys_bo *bo,
289b8e80941Smrg			       uint64_t offset, uint64_t size, uint32_t value)
290b8e80941Smrg{
291b8e80941Smrg	struct radv_device *device = cmd_buffer->device;
292b8e80941Smrg	uint64_t block_count = round_up_u64(size, 1024);
293b8e80941Smrg	struct radv_meta_saved_state saved_state;
294b8e80941Smrg
295b8e80941Smrg	radv_meta_save(&saved_state, cmd_buffer,
296b8e80941Smrg		       RADV_META_SAVE_COMPUTE_PIPELINE |
297b8e80941Smrg		       RADV_META_SAVE_CONSTANTS |
298b8e80941Smrg		       RADV_META_SAVE_DESCRIPTORS);
299b8e80941Smrg
300b8e80941Smrg	struct radv_buffer dst_buffer = {
301b8e80941Smrg		.bo = bo,
302b8e80941Smrg		.offset = offset,
303b8e80941Smrg		.size = size
304b8e80941Smrg	};
305b8e80941Smrg
306b8e80941Smrg	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
307b8e80941Smrg			     VK_PIPELINE_BIND_POINT_COMPUTE,
308b8e80941Smrg			     device->meta_state.buffer.fill_pipeline);
309b8e80941Smrg
310b8e80941Smrg	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
311b8e80941Smrg			              device->meta_state.buffer.fill_p_layout,
312b8e80941Smrg				      0, /* set */
313b8e80941Smrg				      1, /* descriptorWriteCount */
314b8e80941Smrg				      (VkWriteDescriptorSet[]) {
315b8e80941Smrg				              {
316b8e80941Smrg				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
317b8e80941Smrg				                      .dstBinding = 0,
318b8e80941Smrg				                      .dstArrayElement = 0,
319b8e80941Smrg				                      .descriptorCount = 1,
320b8e80941Smrg				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
321b8e80941Smrg				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
322b8e80941Smrg				                              .buffer = radv_buffer_to_handle(&dst_buffer),
323b8e80941Smrg				                              .offset = 0,
324b8e80941Smrg				                              .range = size
325b8e80941Smrg				                      }
326b8e80941Smrg				              }
327b8e80941Smrg				      });
328b8e80941Smrg
329b8e80941Smrg	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
330b8e80941Smrg			      device->meta_state.buffer.fill_p_layout,
331b8e80941Smrg			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
332b8e80941Smrg			      &value);
333b8e80941Smrg
334b8e80941Smrg	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
335b8e80941Smrg
336b8e80941Smrg	radv_meta_restore(&saved_state, cmd_buffer);
337b8e80941Smrg}
338b8e80941Smrg
339b8e80941Smrgstatic void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
340b8e80941Smrg			       struct radeon_winsys_bo *src_bo,
341b8e80941Smrg			       struct radeon_winsys_bo *dst_bo,
342b8e80941Smrg			       uint64_t src_offset, uint64_t dst_offset,
343b8e80941Smrg			       uint64_t size)
344b8e80941Smrg{
345b8e80941Smrg	struct radv_device *device = cmd_buffer->device;
346b8e80941Smrg	uint64_t block_count = round_up_u64(size, 1024);
347b8e80941Smrg	struct radv_meta_saved_state saved_state;
348b8e80941Smrg
349b8e80941Smrg	radv_meta_save(&saved_state, cmd_buffer,
350b8e80941Smrg		       RADV_META_SAVE_COMPUTE_PIPELINE |
351b8e80941Smrg		       RADV_META_SAVE_DESCRIPTORS);
352b8e80941Smrg
353b8e80941Smrg	struct radv_buffer dst_buffer = {
354b8e80941Smrg		.bo = dst_bo,
355b8e80941Smrg		.offset = dst_offset,
356b8e80941Smrg		.size = size
357b8e80941Smrg	};
358b8e80941Smrg
359b8e80941Smrg	struct radv_buffer src_buffer = {
360b8e80941Smrg		.bo = src_bo,
361b8e80941Smrg		.offset = src_offset,
362b8e80941Smrg		.size = size
363b8e80941Smrg	};
364b8e80941Smrg
365b8e80941Smrg	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
366b8e80941Smrg			     VK_PIPELINE_BIND_POINT_COMPUTE,
367b8e80941Smrg			     device->meta_state.buffer.copy_pipeline);
368b8e80941Smrg
369b8e80941Smrg	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
370b8e80941Smrg			              device->meta_state.buffer.copy_p_layout,
371b8e80941Smrg				      0, /* set */
372b8e80941Smrg				      2, /* descriptorWriteCount */
373b8e80941Smrg				      (VkWriteDescriptorSet[]) {
374b8e80941Smrg				              {
375b8e80941Smrg				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
376b8e80941Smrg				                      .dstBinding = 0,
377b8e80941Smrg				                      .dstArrayElement = 0,
378b8e80941Smrg				                      .descriptorCount = 1,
379b8e80941Smrg				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
380b8e80941Smrg				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
381b8e80941Smrg				                              .buffer = radv_buffer_to_handle(&dst_buffer),
382b8e80941Smrg				                              .offset = 0,
383b8e80941Smrg				                              .range = size
384b8e80941Smrg				                      }
385b8e80941Smrg				              },
386b8e80941Smrg				              {
387b8e80941Smrg				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
388b8e80941Smrg				                      .dstBinding = 1,
389b8e80941Smrg				                      .dstArrayElement = 0,
390b8e80941Smrg				                      .descriptorCount = 1,
391b8e80941Smrg				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
392b8e80941Smrg				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
393b8e80941Smrg				                              .buffer = radv_buffer_to_handle(&src_buffer),
394b8e80941Smrg				                              .offset = 0,
395b8e80941Smrg				                              .range = size
396b8e80941Smrg				                      }
397b8e80941Smrg				              }
398b8e80941Smrg				      });
399b8e80941Smrg
400b8e80941Smrg	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
401b8e80941Smrg
402b8e80941Smrg	radv_meta_restore(&saved_state, cmd_buffer);
403b8e80941Smrg}
404b8e80941Smrg
405b8e80941Smrg
406b8e80941Smrguint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
407b8e80941Smrg		      struct radeon_winsys_bo *bo,
408b8e80941Smrg		      uint64_t offset, uint64_t size, uint32_t value)
409b8e80941Smrg{
410b8e80941Smrg	uint32_t flush_bits = 0;
411b8e80941Smrg
412b8e80941Smrg	assert(!(offset & 3));
413b8e80941Smrg	assert(!(size & 3));
414b8e80941Smrg
415b8e80941Smrg	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
416b8e80941Smrg		fill_buffer_shader(cmd_buffer, bo, offset, size, value);
417b8e80941Smrg		flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
418b8e80941Smrg			     RADV_CMD_FLAG_INV_VMEM_L1 |
419b8e80941Smrg			     RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
420b8e80941Smrg	} else if (size) {
421b8e80941Smrg		uint64_t va = radv_buffer_get_va(bo);
422b8e80941Smrg		va += offset;
423b8e80941Smrg		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
424b8e80941Smrg		si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
425b8e80941Smrg	}
426b8e80941Smrg
427b8e80941Smrg	return flush_bits;
428b8e80941Smrg}
429b8e80941Smrg
430b8e80941Smrgstatic
431b8e80941Smrgvoid radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
432b8e80941Smrg		      struct radeon_winsys_bo *src_bo,
433b8e80941Smrg		      struct radeon_winsys_bo *dst_bo,
434b8e80941Smrg		      uint64_t src_offset, uint64_t dst_offset,
435b8e80941Smrg		      uint64_t size)
436b8e80941Smrg{
437b8e80941Smrg	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
438b8e80941Smrg		copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
439b8e80941Smrg				   src_offset, dst_offset, size);
440b8e80941Smrg	else if (size) {
441b8e80941Smrg		uint64_t src_va = radv_buffer_get_va(src_bo);
442b8e80941Smrg		uint64_t dst_va = radv_buffer_get_va(dst_bo);
443b8e80941Smrg		src_va += src_offset;
444b8e80941Smrg		dst_va += dst_offset;
445b8e80941Smrg
446b8e80941Smrg		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
447b8e80941Smrg		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
448b8e80941Smrg
449b8e80941Smrg		si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
450b8e80941Smrg	}
451b8e80941Smrg}
452b8e80941Smrg
453b8e80941Smrgvoid radv_CmdFillBuffer(
454b8e80941Smrg    VkCommandBuffer                             commandBuffer,
455b8e80941Smrg    VkBuffer                                    dstBuffer,
456b8e80941Smrg    VkDeviceSize                                dstOffset,
457b8e80941Smrg    VkDeviceSize                                fillSize,
458b8e80941Smrg    uint32_t                                    data)
459b8e80941Smrg{
460b8e80941Smrg	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
461b8e80941Smrg	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
462b8e80941Smrg
463b8e80941Smrg	if (fillSize == VK_WHOLE_SIZE)
464b8e80941Smrg		fillSize = (dst_buffer->size - dstOffset) & ~3ull;
465b8e80941Smrg
466b8e80941Smrg	radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
467b8e80941Smrg			 fillSize, data);
468b8e80941Smrg}
469b8e80941Smrg
470b8e80941Smrgvoid radv_CmdCopyBuffer(
471b8e80941Smrg	VkCommandBuffer                             commandBuffer,
472b8e80941Smrg	VkBuffer                                    srcBuffer,
473b8e80941Smrg	VkBuffer                                    destBuffer,
474b8e80941Smrg	uint32_t                                    regionCount,
475b8e80941Smrg	const VkBufferCopy*                         pRegions)
476b8e80941Smrg{
477b8e80941Smrg	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
478b8e80941Smrg	RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
479b8e80941Smrg	RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
480b8e80941Smrg	bool old_predicating;
481b8e80941Smrg
482b8e80941Smrg	/* VK_EXT_conditional_rendering says that copy commands should not be
483b8e80941Smrg	 * affected by conditional rendering.
484b8e80941Smrg	 */
485b8e80941Smrg	old_predicating = cmd_buffer->state.predicating;
486b8e80941Smrg	cmd_buffer->state.predicating = false;
487b8e80941Smrg
488b8e80941Smrg	for (unsigned r = 0; r < regionCount; r++) {
489b8e80941Smrg		uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
490b8e80941Smrg		uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
491b8e80941Smrg		uint64_t copy_size = pRegions[r].size;
492b8e80941Smrg
493b8e80941Smrg		radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
494b8e80941Smrg				 src_offset, dest_offset, copy_size);
495b8e80941Smrg	}
496b8e80941Smrg
497b8e80941Smrg	/* Restore conditional rendering. */
498b8e80941Smrg	cmd_buffer->state.predicating = old_predicating;
499b8e80941Smrg}
500b8e80941Smrg
501b8e80941Smrgvoid radv_CmdUpdateBuffer(
502b8e80941Smrg	VkCommandBuffer                             commandBuffer,
503b8e80941Smrg	VkBuffer                                    dstBuffer,
504b8e80941Smrg	VkDeviceSize                                dstOffset,
505b8e80941Smrg	VkDeviceSize                                dataSize,
506b8e80941Smrg	const void*                                 pData)
507b8e80941Smrg{
508b8e80941Smrg	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
509b8e80941Smrg	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
510b8e80941Smrg	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
511b8e80941Smrg	uint64_t words = dataSize / 4;
512b8e80941Smrg	uint64_t va = radv_buffer_get_va(dst_buffer->bo);
513b8e80941Smrg	va += dstOffset + dst_buffer->offset;
514b8e80941Smrg
515b8e80941Smrg	assert(!(dataSize & 3));
516b8e80941Smrg	assert(!(va & 3));
517b8e80941Smrg
518b8e80941Smrg	if (!dataSize)
519b8e80941Smrg		return;
520b8e80941Smrg
521b8e80941Smrg	if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
522b8e80941Smrg		si_emit_cache_flush(cmd_buffer);
523b8e80941Smrg
524b8e80941Smrg		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
525b8e80941Smrg
526b8e80941Smrg		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
527b8e80941Smrg
528b8e80941Smrg		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
529b8e80941Smrg		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
530b8e80941Smrg		                                V_370_MEM : V_370_MEM_GRBM) |
531b8e80941Smrg		                            S_370_WR_CONFIRM(1) |
532b8e80941Smrg		                            S_370_ENGINE_SEL(V_370_ME));
533b8e80941Smrg		radeon_emit(cmd_buffer->cs, va);
534b8e80941Smrg		radeon_emit(cmd_buffer->cs, va >> 32);
535b8e80941Smrg		radeon_emit_array(cmd_buffer->cs, pData, words);
536b8e80941Smrg
537b8e80941Smrg		if (unlikely(cmd_buffer->device->trace_bo))
538b8e80941Smrg			radv_cmd_buffer_trace_emit(cmd_buffer);
539b8e80941Smrg	} else {
540b8e80941Smrg		uint32_t buf_offset;
541b8e80941Smrg		radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
542b8e80941Smrg		radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
543b8e80941Smrg				 buf_offset, dstOffset + dst_buffer->offset, dataSize);
544b8e80941Smrg	}
545b8e80941Smrg}
546