1#include "radv_meta.h"
2#include "nir/nir_builder.h"
3
4#include "sid.h"
5#include "radv_cs.h"
6
7static nir_shader *
8build_buffer_fill_shader(struct radv_device *dev)
9{
10	nir_builder b;
11
12	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
13	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
14	b.shader->info.cs.local_size[0] = 64;
15	b.shader->info.cs.local_size[1] = 1;
16	b.shader->info.cs.local_size[2] = 1;
17
18	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
19	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
20	nir_ssa_def *block_size = nir_imm_ivec4(&b,
21						b.shader->info.cs.local_size[0],
22						b.shader->info.cs.local_size[1],
23						b.shader->info.cs.local_size[2], 0);
24
25	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
26
27	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
28	offset = nir_channel(&b, offset, 0);
29
30	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
31	                                                          nir_intrinsic_vulkan_resource_index);
32	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
33	dst_buf->num_components = 1;
34	nir_intrinsic_set_desc_set(dst_buf, 0);
35	nir_intrinsic_set_binding(dst_buf, 0);
36	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
37	nir_builder_instr_insert(&b, &dst_buf->instr);
38
39	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
40	nir_intrinsic_set_base(load, 0);
41	nir_intrinsic_set_range(load, 4);
42	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
43	load->num_components = 1;
44	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
45	nir_builder_instr_insert(&b, &load->instr);
46
47	nir_ssa_def *swizzled_load = nir_swizzle(&b, &load->dest.ssa, (unsigned[]) { 0, 0, 0, 0}, 4, false);
48
49	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
50	store->src[0] = nir_src_for_ssa(swizzled_load);
51	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
52	store->src[2] = nir_src_for_ssa(offset);
53	nir_intrinsic_set_write_mask(store, 0xf);
54	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
55	store->num_components = 4;
56	nir_builder_instr_insert(&b, &store->instr);
57
58	return b.shader;
59}
60
61static nir_shader *
62build_buffer_copy_shader(struct radv_device *dev)
63{
64	nir_builder b;
65
66	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
67	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
68	b.shader->info.cs.local_size[0] = 64;
69	b.shader->info.cs.local_size[1] = 1;
70	b.shader->info.cs.local_size[2] = 1;
71
72	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
73	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
74	nir_ssa_def *block_size = nir_imm_ivec4(&b,
75						b.shader->info.cs.local_size[0],
76						b.shader->info.cs.local_size[1],
77						b.shader->info.cs.local_size[2], 0);
78
79	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
80
81	nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
82	offset = nir_channel(&b, offset, 0);
83
84	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
85	                                                          nir_intrinsic_vulkan_resource_index);
86	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
87	dst_buf->num_components = 1;
88	nir_intrinsic_set_desc_set(dst_buf, 0);
89	nir_intrinsic_set_binding(dst_buf, 0);
90	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, dst_buf->num_components, 32, NULL);
91	nir_builder_instr_insert(&b, &dst_buf->instr);
92
93	nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
94	                                                          nir_intrinsic_vulkan_resource_index);
95	src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
96	src_buf->num_components = 1;
97	nir_intrinsic_set_desc_set(src_buf, 0);
98	nir_intrinsic_set_binding(src_buf, 1);
99	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, src_buf->num_components, 32, NULL);
100	nir_builder_instr_insert(&b, &src_buf->instr);
101
102	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
103	load->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
104	load->src[1] = nir_src_for_ssa(offset);
105	nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
106	load->num_components = 4;
107	nir_builder_instr_insert(&b, &load->instr);
108
109	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
110	store->src[0] = nir_src_for_ssa(&load->dest.ssa);
111	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
112	store->src[2] = nir_src_for_ssa(offset);
113	nir_intrinsic_set_write_mask(store, 0xf);
114	nir_intrinsic_set_access(store, ACCESS_NON_READABLE);
115	store->num_components = 4;
116	nir_builder_instr_insert(&b, &store->instr);
117
118	return b.shader;
119}
120
121
122
123VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
124{
125	VkResult result;
126	struct radv_shader_module fill_cs = { .nir = NULL };
127	struct radv_shader_module copy_cs = { .nir = NULL };
128
129	fill_cs.nir = build_buffer_fill_shader(device);
130	copy_cs.nir = build_buffer_copy_shader(device);
131
132	VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
133		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
134		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
135		.bindingCount = 1,
136		.pBindings = (VkDescriptorSetLayoutBinding[]) {
137			{
138				.binding = 0,
139				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
140				.descriptorCount = 1,
141				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
142				.pImmutableSamplers = NULL
143			},
144		}
145	};
146
147	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
148						&fill_ds_create_info,
149						&device->meta_state.alloc,
150						&device->meta_state.buffer.fill_ds_layout);
151	if (result != VK_SUCCESS)
152		goto fail;
153
154	VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
155		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
156		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
157		.bindingCount = 2,
158		.pBindings = (VkDescriptorSetLayoutBinding[]) {
159			{
160				.binding = 0,
161				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
162				.descriptorCount = 1,
163				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
164				.pImmutableSamplers = NULL
165			},
166			{
167				.binding = 1,
168				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
169				.descriptorCount = 1,
170				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
171				.pImmutableSamplers = NULL
172			},
173		}
174	};
175
176	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
177						&copy_ds_create_info,
178						&device->meta_state.alloc,
179						&device->meta_state.buffer.copy_ds_layout);
180	if (result != VK_SUCCESS)
181		goto fail;
182
183
184	VkPipelineLayoutCreateInfo fill_pl_create_info = {
185		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
186		.setLayoutCount = 1,
187		.pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
188		.pushConstantRangeCount = 1,
189		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
190	};
191
192	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
193					  &fill_pl_create_info,
194					  &device->meta_state.alloc,
195					  &device->meta_state.buffer.fill_p_layout);
196	if (result != VK_SUCCESS)
197		goto fail;
198
199	VkPipelineLayoutCreateInfo copy_pl_create_info = {
200		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
201		.setLayoutCount = 1,
202		.pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
203		.pushConstantRangeCount = 0,
204	};
205
206	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
207					  &copy_pl_create_info,
208					  &device->meta_state.alloc,
209					  &device->meta_state.buffer.copy_p_layout);
210	if (result != VK_SUCCESS)
211		goto fail;
212
213	VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
214		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
215		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
216		.module = radv_shader_module_to_handle(&fill_cs),
217		.pName = "main",
218		.pSpecializationInfo = NULL,
219	};
220
221	VkComputePipelineCreateInfo fill_vk_pipeline_info = {
222		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
223		.stage = fill_pipeline_shader_stage,
224		.flags = 0,
225		.layout = device->meta_state.buffer.fill_p_layout,
226	};
227
228	result = radv_CreateComputePipelines(radv_device_to_handle(device),
229					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
230					     1, &fill_vk_pipeline_info, NULL,
231					     &device->meta_state.buffer.fill_pipeline);
232	if (result != VK_SUCCESS)
233		goto fail;
234
235	VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
236		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
237		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
238		.module = radv_shader_module_to_handle(&copy_cs),
239		.pName = "main",
240		.pSpecializationInfo = NULL,
241	};
242
243	VkComputePipelineCreateInfo copy_vk_pipeline_info = {
244		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
245		.stage = copy_pipeline_shader_stage,
246		.flags = 0,
247		.layout = device->meta_state.buffer.copy_p_layout,
248	};
249
250	result = radv_CreateComputePipelines(radv_device_to_handle(device),
251					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
252					     1, &copy_vk_pipeline_info, NULL,
253					     &device->meta_state.buffer.copy_pipeline);
254	if (result != VK_SUCCESS)
255		goto fail;
256
257	ralloc_free(fill_cs.nir);
258	ralloc_free(copy_cs.nir);
259	return VK_SUCCESS;
260fail:
261	radv_device_finish_meta_buffer_state(device);
262	ralloc_free(fill_cs.nir);
263	ralloc_free(copy_cs.nir);
264	return result;
265}
266
267void radv_device_finish_meta_buffer_state(struct radv_device *device)
268{
269	struct radv_meta_state *state = &device->meta_state;
270
271	radv_DestroyPipeline(radv_device_to_handle(device),
272			     state->buffer.copy_pipeline, &state->alloc);
273	radv_DestroyPipeline(radv_device_to_handle(device),
274			     state->buffer.fill_pipeline, &state->alloc);
275	radv_DestroyPipelineLayout(radv_device_to_handle(device),
276				   state->buffer.copy_p_layout, &state->alloc);
277	radv_DestroyPipelineLayout(radv_device_to_handle(device),
278				   state->buffer.fill_p_layout, &state->alloc);
279	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
280					state->buffer.copy_ds_layout,
281					&state->alloc);
282	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
283					state->buffer.fill_ds_layout,
284					&state->alloc);
285}
286
287static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
288			       struct radeon_winsys_bo *bo,
289			       uint64_t offset, uint64_t size, uint32_t value)
290{
291	struct radv_device *device = cmd_buffer->device;
292	uint64_t block_count = round_up_u64(size, 1024);
293	struct radv_meta_saved_state saved_state;
294
295	radv_meta_save(&saved_state, cmd_buffer,
296		       RADV_META_SAVE_COMPUTE_PIPELINE |
297		       RADV_META_SAVE_CONSTANTS |
298		       RADV_META_SAVE_DESCRIPTORS);
299
300	struct radv_buffer dst_buffer = {
301		.bo = bo,
302		.offset = offset,
303		.size = size
304	};
305
306	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
307			     VK_PIPELINE_BIND_POINT_COMPUTE,
308			     device->meta_state.buffer.fill_pipeline);
309
310	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
311			              device->meta_state.buffer.fill_p_layout,
312				      0, /* set */
313				      1, /* descriptorWriteCount */
314				      (VkWriteDescriptorSet[]) {
315				              {
316				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
317				                      .dstBinding = 0,
318				                      .dstArrayElement = 0,
319				                      .descriptorCount = 1,
320				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
321				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
322				                              .buffer = radv_buffer_to_handle(&dst_buffer),
323				                              .offset = 0,
324				                              .range = size
325				                      }
326				              }
327				      });
328
329	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
330			      device->meta_state.buffer.fill_p_layout,
331			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
332			      &value);
333
334	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
335
336	radv_meta_restore(&saved_state, cmd_buffer);
337}
338
339static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
340			       struct radeon_winsys_bo *src_bo,
341			       struct radeon_winsys_bo *dst_bo,
342			       uint64_t src_offset, uint64_t dst_offset,
343			       uint64_t size)
344{
345	struct radv_device *device = cmd_buffer->device;
346	uint64_t block_count = round_up_u64(size, 1024);
347	struct radv_meta_saved_state saved_state;
348
349	radv_meta_save(&saved_state, cmd_buffer,
350		       RADV_META_SAVE_COMPUTE_PIPELINE |
351		       RADV_META_SAVE_DESCRIPTORS);
352
353	struct radv_buffer dst_buffer = {
354		.bo = dst_bo,
355		.offset = dst_offset,
356		.size = size
357	};
358
359	struct radv_buffer src_buffer = {
360		.bo = src_bo,
361		.offset = src_offset,
362		.size = size
363	};
364
365	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
366			     VK_PIPELINE_BIND_POINT_COMPUTE,
367			     device->meta_state.buffer.copy_pipeline);
368
369	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
370			              device->meta_state.buffer.copy_p_layout,
371				      0, /* set */
372				      2, /* descriptorWriteCount */
373				      (VkWriteDescriptorSet[]) {
374				              {
375				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
376				                      .dstBinding = 0,
377				                      .dstArrayElement = 0,
378				                      .descriptorCount = 1,
379				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
380				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
381				                              .buffer = radv_buffer_to_handle(&dst_buffer),
382				                              .offset = 0,
383				                              .range = size
384				                      }
385				              },
386				              {
387				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
388				                      .dstBinding = 1,
389				                      .dstArrayElement = 0,
390				                      .descriptorCount = 1,
391				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
392				                      .pBufferInfo = &(VkDescriptorBufferInfo) {
393				                              .buffer = radv_buffer_to_handle(&src_buffer),
394				                              .offset = 0,
395				                              .range = size
396				                      }
397				              }
398				      });
399
400	radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
401
402	radv_meta_restore(&saved_state, cmd_buffer);
403}
404
405
406uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
407		      struct radeon_winsys_bo *bo,
408		      uint64_t offset, uint64_t size, uint32_t value)
409{
410	uint32_t flush_bits = 0;
411
412	assert(!(offset & 3));
413	assert(!(size & 3));
414
415	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
416		fill_buffer_shader(cmd_buffer, bo, offset, size, value);
417		flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
418			     RADV_CMD_FLAG_INV_VMEM_L1 |
419			     RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
420	} else if (size) {
421		uint64_t va = radv_buffer_get_va(bo);
422		va += offset;
423		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
424		si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
425	}
426
427	return flush_bits;
428}
429
430static
431void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
432		      struct radeon_winsys_bo *src_bo,
433		      struct radeon_winsys_bo *dst_bo,
434		      uint64_t src_offset, uint64_t dst_offset,
435		      uint64_t size)
436{
437	if (size >= RADV_BUFFER_OPS_CS_THRESHOLD && !(size & 3) && !(src_offset & 3) && !(dst_offset & 3))
438		copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
439				   src_offset, dst_offset, size);
440	else if (size) {
441		uint64_t src_va = radv_buffer_get_va(src_bo);
442		uint64_t dst_va = radv_buffer_get_va(dst_bo);
443		src_va += src_offset;
444		dst_va += dst_offset;
445
446		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
447		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
448
449		si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
450	}
451}
452
453void radv_CmdFillBuffer(
454    VkCommandBuffer                             commandBuffer,
455    VkBuffer                                    dstBuffer,
456    VkDeviceSize                                dstOffset,
457    VkDeviceSize                                fillSize,
458    uint32_t                                    data)
459{
460	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
461	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
462
463	if (fillSize == VK_WHOLE_SIZE)
464		fillSize = (dst_buffer->size - dstOffset) & ~3ull;
465
466	radv_fill_buffer(cmd_buffer, dst_buffer->bo, dst_buffer->offset + dstOffset,
467			 fillSize, data);
468}
469
470void radv_CmdCopyBuffer(
471	VkCommandBuffer                             commandBuffer,
472	VkBuffer                                    srcBuffer,
473	VkBuffer                                    destBuffer,
474	uint32_t                                    regionCount,
475	const VkBufferCopy*                         pRegions)
476{
477	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
478	RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);
479	RADV_FROM_HANDLE(radv_buffer, dest_buffer, destBuffer);
480	bool old_predicating;
481
482	/* VK_EXT_conditional_rendering says that copy commands should not be
483	 * affected by conditional rendering.
484	 */
485	old_predicating = cmd_buffer->state.predicating;
486	cmd_buffer->state.predicating = false;
487
488	for (unsigned r = 0; r < regionCount; r++) {
489		uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
490		uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
491		uint64_t copy_size = pRegions[r].size;
492
493		radv_copy_buffer(cmd_buffer, src_buffer->bo, dest_buffer->bo,
494				 src_offset, dest_offset, copy_size);
495	}
496
497	/* Restore conditional rendering. */
498	cmd_buffer->state.predicating = old_predicating;
499}
500
501void radv_CmdUpdateBuffer(
502	VkCommandBuffer                             commandBuffer,
503	VkBuffer                                    dstBuffer,
504	VkDeviceSize                                dstOffset,
505	VkDeviceSize                                dataSize,
506	const void*                                 pData)
507{
508	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
509	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
510	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
511	uint64_t words = dataSize / 4;
512	uint64_t va = radv_buffer_get_va(dst_buffer->bo);
513	va += dstOffset + dst_buffer->offset;
514
515	assert(!(dataSize & 3));
516	assert(!(va & 3));
517
518	if (!dataSize)
519		return;
520
521	if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
522		si_emit_cache_flush(cmd_buffer);
523
524		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
525
526		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
527
528		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
529		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
530		                                V_370_MEM : V_370_MEM_GRBM) |
531		                            S_370_WR_CONFIRM(1) |
532		                            S_370_ENGINE_SEL(V_370_ME));
533		radeon_emit(cmd_buffer->cs, va);
534		radeon_emit(cmd_buffer->cs, va >> 32);
535		radeon_emit_array(cmd_buffer->cs, pData, words);
536
537		if (unlikely(cmd_buffer->device->trace_bo))
538			radv_cmd_buffer_trace_emit(cmd_buffer);
539	} else {
540		uint32_t buf_offset;
541		radv_cmd_buffer_upload_data(cmd_buffer, dataSize, 32, pData, &buf_offset);
542		radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
543				 buf_offset, dstOffset + dst_buffer->offset, dataSize);
544	}
545}
546