1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24#include "radv_meta.h"
25#include "nir/nir_builder.h"
26
27/*
28 * GFX queue: Compute shader implementation of image->buffer copy
29 * Compute queue: implementation also of buffer->image, image->image, and image clear.
30 */
31
32/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33 * for that.
34 */
35static nir_shader *
36build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37{
38	nir_builder b;
39	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
40	const struct glsl_type *sampler_type = glsl_sampler_type(dim,
41								 false,
42								 false,
43								 GLSL_TYPE_FLOAT);
44	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
45							     false,
46							     false,
47							     GLSL_TYPE_FLOAT);
48	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
49	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
50	b.shader->info.cs.local_size[0] = 16;
51	b.shader->info.cs.local_size[1] = 16;
52	b.shader->info.cs.local_size[2] = 1;
53	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
54						      sampler_type, "s_tex");
55	input_img->data.descriptor_set = 0;
56	input_img->data.binding = 0;
57
58	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
59						       img_type, "out_img");
60	output_img->data.descriptor_set = 0;
61	output_img->data.binding = 1;
62
63	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
64	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
65	nir_ssa_def *block_size = nir_imm_ivec4(&b,
66						b.shader->info.cs.local_size[0],
67						b.shader->info.cs.local_size[1],
68						b.shader->info.cs.local_size[2], 0);
69
70	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
71
72
73
74	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
75	nir_intrinsic_set_base(offset, 0);
76	nir_intrinsic_set_range(offset, 16);
77	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
78	offset->num_components = is_3d ? 3 : 2;
79	nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
80	nir_builder_instr_insert(&b, &offset->instr);
81
82	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
83	nir_intrinsic_set_base(stride, 0);
84	nir_intrinsic_set_range(stride, 16);
85	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
86	stride->num_components = 1;
87	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
88	nir_builder_instr_insert(&b, &stride->instr);
89
90	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
91	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
92
93	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
94	tex->sampler_dim = dim;
95	tex->op = nir_texop_txf;
96	tex->src[0].src_type = nir_tex_src_coord;
97	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
98	tex->src[1].src_type = nir_tex_src_lod;
99	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
100	tex->src[2].src_type = nir_tex_src_texture_deref;
101	tex->src[2].src = nir_src_for_ssa(input_img_deref);
102	tex->dest_type = nir_type_float;
103	tex->is_array = false;
104	tex->coord_components = is_3d ? 3 : 2;
105
106	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
107	nir_builder_instr_insert(&b, &tex->instr);
108
109	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
110	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
111
112	nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
113	tmp = nir_iadd(&b, tmp, pos_x);
114
115	nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
116
117	nir_ssa_def *outval = &tex->dest.ssa;
118	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
119	store->num_components = 4;
120	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
121	store->src[1] = nir_src_for_ssa(coord);
122	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
123	store->src[3] = nir_src_for_ssa(outval);
124
125	nir_builder_instr_insert(&b, &store->instr);
126	return b.shader;
127}
128
129/* Image to buffer - don't write use image accessors */
130static VkResult
131radv_device_init_meta_itob_state(struct radv_device *device)
132{
133	VkResult result;
134	struct radv_shader_module cs = { .nir = NULL };
135	struct radv_shader_module cs_3d = { .nir = NULL };
136
137	cs.nir = build_nir_itob_compute_shader(device, false);
138	if (device->physical_device->rad_info.chip_class >= GFX9)
139		cs_3d.nir = build_nir_itob_compute_shader(device, true);
140
141	/*
142	 * two descriptors one for the image being sampled
143	 * one for the buffer being written.
144	 */
145	VkDescriptorSetLayoutCreateInfo ds_create_info = {
146		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
147		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
148		.bindingCount = 2,
149		.pBindings = (VkDescriptorSetLayoutBinding[]) {
150			{
151				.binding = 0,
152				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
153				.descriptorCount = 1,
154				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
155				.pImmutableSamplers = NULL
156			},
157			{
158				.binding = 1,
159				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
160				.descriptorCount = 1,
161				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
162				.pImmutableSamplers = NULL
163			},
164		}
165	};
166
167	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
168						&ds_create_info,
169						&device->meta_state.alloc,
170						&device->meta_state.itob.img_ds_layout);
171	if (result != VK_SUCCESS)
172		goto fail;
173
174
175	VkPipelineLayoutCreateInfo pl_create_info = {
176		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177		.setLayoutCount = 1,
178		.pSetLayouts = &device->meta_state.itob.img_ds_layout,
179		.pushConstantRangeCount = 1,
180		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
181	};
182
183	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
184					  &pl_create_info,
185					  &device->meta_state.alloc,
186					  &device->meta_state.itob.img_p_layout);
187	if (result != VK_SUCCESS)
188		goto fail;
189
190	/* compute shader */
191
192	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
193		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
194		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
195		.module = radv_shader_module_to_handle(&cs),
196		.pName = "main",
197		.pSpecializationInfo = NULL,
198	};
199
200	VkComputePipelineCreateInfo vk_pipeline_info = {
201		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
202		.stage = pipeline_shader_stage,
203		.flags = 0,
204		.layout = device->meta_state.itob.img_p_layout,
205	};
206
207	result = radv_CreateComputePipelines(radv_device_to_handle(device),
208					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
209					     1, &vk_pipeline_info, NULL,
210					     &device->meta_state.itob.pipeline);
211	if (result != VK_SUCCESS)
212		goto fail;
213
214	if (device->physical_device->rad_info.chip_class >= GFX9) {
215		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
216			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
217			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
218			.module = radv_shader_module_to_handle(&cs_3d),
219			.pName = "main",
220			.pSpecializationInfo = NULL,
221		};
222
223		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
224			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
225			.stage = pipeline_shader_stage_3d,
226			.flags = 0,
227			.layout = device->meta_state.itob.img_p_layout,
228		};
229
230		result = radv_CreateComputePipelines(radv_device_to_handle(device),
231						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
232						     1, &vk_pipeline_info_3d, NULL,
233						     &device->meta_state.itob.pipeline_3d);
234		if (result != VK_SUCCESS)
235			goto fail;
236		ralloc_free(cs_3d.nir);
237	}
238	ralloc_free(cs.nir);
239
240	return VK_SUCCESS;
241fail:
242	ralloc_free(cs.nir);
243	ralloc_free(cs_3d.nir);
244	return result;
245}
246
247static void
248radv_device_finish_meta_itob_state(struct radv_device *device)
249{
250	struct radv_meta_state *state = &device->meta_state;
251
252	radv_DestroyPipelineLayout(radv_device_to_handle(device),
253				   state->itob.img_p_layout, &state->alloc);
254	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
255				        state->itob.img_ds_layout,
256					&state->alloc);
257	radv_DestroyPipeline(radv_device_to_handle(device),
258			     state->itob.pipeline, &state->alloc);
259	if (device->physical_device->rad_info.chip_class >= GFX9)
260		radv_DestroyPipeline(radv_device_to_handle(device),
261				     state->itob.pipeline_3d, &state->alloc);
262}
263
264static nir_shader *
265build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
266{
267	nir_builder b;
268	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
269	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
270							     false,
271							     false,
272							     GLSL_TYPE_FLOAT);
273	const struct glsl_type *img_type = glsl_sampler_type(dim,
274							     false,
275							     false,
276							     GLSL_TYPE_FLOAT);
277	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
278	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
279	b.shader->info.cs.local_size[0] = 16;
280	b.shader->info.cs.local_size[1] = 16;
281	b.shader->info.cs.local_size[2] = 1;
282	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
283						      buf_type, "s_tex");
284	input_img->data.descriptor_set = 0;
285	input_img->data.binding = 0;
286
287	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
288						       img_type, "out_img");
289	output_img->data.descriptor_set = 0;
290	output_img->data.binding = 1;
291
292	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
293	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
294	nir_ssa_def *block_size = nir_imm_ivec4(&b,
295						b.shader->info.cs.local_size[0],
296						b.shader->info.cs.local_size[1],
297						b.shader->info.cs.local_size[2], 0);
298
299	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
300
301	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
302	nir_intrinsic_set_base(offset, 0);
303	nir_intrinsic_set_range(offset, 16);
304	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
305	offset->num_components = is_3d ? 3 : 2;
306	nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
307	nir_builder_instr_insert(&b, &offset->instr);
308
309	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
310	nir_intrinsic_set_base(stride, 0);
311	nir_intrinsic_set_range(stride, 16);
312	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
313	stride->num_components = 1;
314	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
315	nir_builder_instr_insert(&b, &stride->instr);
316
317	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
318	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
319
320	nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
321	tmp = nir_iadd(&b, tmp, pos_x);
322
323	nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
324
325	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
326	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
327
328	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
329	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
330	tex->op = nir_texop_txf;
331	tex->src[0].src_type = nir_tex_src_coord;
332	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
333	tex->src[1].src_type = nir_tex_src_lod;
334	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
335	tex->src[2].src_type = nir_tex_src_texture_deref;
336	tex->src[2].src = nir_src_for_ssa(input_img_deref);
337	tex->dest_type = nir_type_float;
338	tex->is_array = false;
339	tex->coord_components = 1;
340
341	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
342	nir_builder_instr_insert(&b, &tex->instr);
343
344	nir_ssa_def *outval = &tex->dest.ssa;
345	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
346	store->num_components = 4;
347	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
348	store->src[1] = nir_src_for_ssa(img_coord);
349	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
350	store->src[3] = nir_src_for_ssa(outval);
351
352	nir_builder_instr_insert(&b, &store->instr);
353	return b.shader;
354}
355
356/* Buffer to image - don't write use image accessors */
357static VkResult
358radv_device_init_meta_btoi_state(struct radv_device *device)
359{
360	VkResult result;
361	struct radv_shader_module cs = { .nir = NULL };
362	struct radv_shader_module cs_3d = { .nir = NULL };
363	cs.nir = build_nir_btoi_compute_shader(device, false);
364	if (device->physical_device->rad_info.chip_class >= GFX9)
365		cs_3d.nir = build_nir_btoi_compute_shader(device, true);
366	/*
367	 * two descriptors one for the image being sampled
368	 * one for the buffer being written.
369	 */
370	VkDescriptorSetLayoutCreateInfo ds_create_info = {
371		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
372		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
373		.bindingCount = 2,
374		.pBindings = (VkDescriptorSetLayoutBinding[]) {
375			{
376				.binding = 0,
377				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
378				.descriptorCount = 1,
379				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
380				.pImmutableSamplers = NULL
381			},
382			{
383				.binding = 1,
384				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
385				.descriptorCount = 1,
386				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
387				.pImmutableSamplers = NULL
388			},
389		}
390	};
391
392	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
393						&ds_create_info,
394						&device->meta_state.alloc,
395						&device->meta_state.btoi.img_ds_layout);
396	if (result != VK_SUCCESS)
397		goto fail;
398
399
400	VkPipelineLayoutCreateInfo pl_create_info = {
401		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
402		.setLayoutCount = 1,
403		.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
404		.pushConstantRangeCount = 1,
405		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
406	};
407
408	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
409					  &pl_create_info,
410					  &device->meta_state.alloc,
411					  &device->meta_state.btoi.img_p_layout);
412	if (result != VK_SUCCESS)
413		goto fail;
414
415	/* compute shader */
416
417	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
418		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
419		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
420		.module = radv_shader_module_to_handle(&cs),
421		.pName = "main",
422		.pSpecializationInfo = NULL,
423	};
424
425	VkComputePipelineCreateInfo vk_pipeline_info = {
426		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
427		.stage = pipeline_shader_stage,
428		.flags = 0,
429		.layout = device->meta_state.btoi.img_p_layout,
430	};
431
432	result = radv_CreateComputePipelines(radv_device_to_handle(device),
433					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
434					     1, &vk_pipeline_info, NULL,
435					     &device->meta_state.btoi.pipeline);
436	if (result != VK_SUCCESS)
437		goto fail;
438
439	if (device->physical_device->rad_info.chip_class >= GFX9) {
440		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
441			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
442			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
443			.module = radv_shader_module_to_handle(&cs_3d),
444			.pName = "main",
445			.pSpecializationInfo = NULL,
446		};
447
448		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
449			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
450			.stage = pipeline_shader_stage_3d,
451			.flags = 0,
452			.layout = device->meta_state.btoi.img_p_layout,
453		};
454
455		result = radv_CreateComputePipelines(radv_device_to_handle(device),
456						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
457						     1, &vk_pipeline_info_3d, NULL,
458						     &device->meta_state.btoi.pipeline_3d);
459		ralloc_free(cs_3d.nir);
460	}
461	ralloc_free(cs.nir);
462
463	return VK_SUCCESS;
464fail:
465	ralloc_free(cs_3d.nir);
466	ralloc_free(cs.nir);
467	return result;
468}
469
470static void
471radv_device_finish_meta_btoi_state(struct radv_device *device)
472{
473	struct radv_meta_state *state = &device->meta_state;
474
475	radv_DestroyPipelineLayout(radv_device_to_handle(device),
476				   state->btoi.img_p_layout, &state->alloc);
477	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
478				        state->btoi.img_ds_layout,
479					&state->alloc);
480	radv_DestroyPipeline(radv_device_to_handle(device),
481			     state->btoi.pipeline, &state->alloc);
482	radv_DestroyPipeline(radv_device_to_handle(device),
483			     state->btoi.pipeline_3d, &state->alloc);
484}
485
486/* Buffer to image - special path for R32G32B32 */
487static nir_shader *
488build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
489{
490	nir_builder b;
491	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
492							     false,
493							     false,
494							     GLSL_TYPE_FLOAT);
495	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
496							     false,
497							     false,
498							     GLSL_TYPE_FLOAT);
499	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
500	b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
501	b.shader->info.cs.local_size[0] = 16;
502	b.shader->info.cs.local_size[1] = 16;
503	b.shader->info.cs.local_size[2] = 1;
504	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
505						      buf_type, "s_tex");
506	input_img->data.descriptor_set = 0;
507	input_img->data.binding = 0;
508
509	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
510						       img_type, "out_img");
511	output_img->data.descriptor_set = 0;
512	output_img->data.binding = 1;
513
514	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
515	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
516	nir_ssa_def *block_size = nir_imm_ivec4(&b,
517						b.shader->info.cs.local_size[0],
518						b.shader->info.cs.local_size[1],
519						b.shader->info.cs.local_size[2], 0);
520
521	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
522
523	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
524	nir_intrinsic_set_base(offset, 0);
525	nir_intrinsic_set_range(offset, 16);
526	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
527	offset->num_components = 2;
528	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
529	nir_builder_instr_insert(&b, &offset->instr);
530
531	nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
532	nir_intrinsic_set_base(pitch, 0);
533	nir_intrinsic_set_range(pitch, 16);
534	pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
535	pitch->num_components = 1;
536	nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
537	nir_builder_instr_insert(&b, &pitch->instr);
538
539	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
540	nir_intrinsic_set_base(stride, 0);
541	nir_intrinsic_set_range(stride, 16);
542	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
543	stride->num_components = 1;
544	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
545	nir_builder_instr_insert(&b, &stride->instr);
546
547	nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
548	nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
549
550	nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
551	tmp = nir_iadd(&b, tmp, pos_x);
552
553	nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
554
555	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
556
557	nir_ssa_def *global_pos =
558		nir_iadd(&b,
559			 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
560			 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
561
562	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
563
564	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
565	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
566	tex->op = nir_texop_txf;
567	tex->src[0].src_type = nir_tex_src_coord;
568	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
569	tex->src[1].src_type = nir_tex_src_lod;
570	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
571	tex->src[2].src_type = nir_tex_src_texture_deref;
572	tex->src[2].src = nir_src_for_ssa(input_img_deref);
573	tex->dest_type = nir_type_float;
574	tex->is_array = false;
575	tex->coord_components = 1;
576	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
577	nir_builder_instr_insert(&b, &tex->instr);
578
579	nir_ssa_def *outval = &tex->dest.ssa;
580
581	for (int chan = 0; chan < 3; chan++) {
582		nir_ssa_def *local_pos =
583                       nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
584
585               nir_ssa_def *coord =
586                       nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
587
588		nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
589		store->num_components = 1;
590		store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
591		store->src[1] = nir_src_for_ssa(coord);
592		store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
593		store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
594		nir_builder_instr_insert(&b, &store->instr);
595	}
596
597	return b.shader;
598}
599
600static VkResult
601radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
602{
603	VkResult result;
604	struct radv_shader_module cs = { .nir = NULL };
605
606	cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
607
608	VkDescriptorSetLayoutCreateInfo ds_create_info = {
609		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
610		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
611		.bindingCount = 2,
612		.pBindings = (VkDescriptorSetLayoutBinding[]) {
613			{
614				.binding = 0,
615				.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
616				.descriptorCount = 1,
617				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
618				.pImmutableSamplers = NULL
619			},
620			{
621				.binding = 1,
622				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
623				.descriptorCount = 1,
624				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
625				.pImmutableSamplers = NULL
626			},
627		}
628	};
629
630	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
631						&ds_create_info,
632						&device->meta_state.alloc,
633						&device->meta_state.btoi_r32g32b32.img_ds_layout);
634	if (result != VK_SUCCESS)
635		goto fail;
636
637
638	VkPipelineLayoutCreateInfo pl_create_info = {
639		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
640		.setLayoutCount = 1,
641		.pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
642		.pushConstantRangeCount = 1,
643		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
644	};
645
646	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
647					  &pl_create_info,
648					  &device->meta_state.alloc,
649					  &device->meta_state.btoi_r32g32b32.img_p_layout);
650	if (result != VK_SUCCESS)
651		goto fail;
652
653	/* compute shader */
654
655	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
656		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
657		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
658		.module = radv_shader_module_to_handle(&cs),
659		.pName = "main",
660		.pSpecializationInfo = NULL,
661	};
662
663	VkComputePipelineCreateInfo vk_pipeline_info = {
664		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
665		.stage = pipeline_shader_stage,
666		.flags = 0,
667		.layout = device->meta_state.btoi_r32g32b32.img_p_layout,
668	};
669
670	result = radv_CreateComputePipelines(radv_device_to_handle(device),
671					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
672					     1, &vk_pipeline_info, NULL,
673					     &device->meta_state.btoi_r32g32b32.pipeline);
674
675fail:
676	ralloc_free(cs.nir);
677	return result;
678}
679
680static void
681radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
682{
683	struct radv_meta_state *state = &device->meta_state;
684
685	radv_DestroyPipelineLayout(radv_device_to_handle(device),
686				   state->btoi_r32g32b32.img_p_layout, &state->alloc);
687	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
688				        state->btoi_r32g32b32.img_ds_layout,
689					&state->alloc);
690	radv_DestroyPipeline(radv_device_to_handle(device),
691			     state->btoi_r32g32b32.pipeline, &state->alloc);
692}
693
694static nir_shader *
695build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
696{
697	nir_builder b;
698	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
699	const struct glsl_type *buf_type = glsl_sampler_type(dim,
700							     false,
701							     false,
702							     GLSL_TYPE_FLOAT);
703	const struct glsl_type *img_type = glsl_sampler_type(dim,
704							     false,
705							     false,
706							     GLSL_TYPE_FLOAT);
707	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
708	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
709	b.shader->info.cs.local_size[0] = 16;
710	b.shader->info.cs.local_size[1] = 16;
711	b.shader->info.cs.local_size[2] = 1;
712	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
713						      buf_type, "s_tex");
714	input_img->data.descriptor_set = 0;
715	input_img->data.binding = 0;
716
717	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
718						       img_type, "out_img");
719	output_img->data.descriptor_set = 0;
720	output_img->data.binding = 1;
721
722	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
723	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
724	nir_ssa_def *block_size = nir_imm_ivec4(&b,
725						b.shader->info.cs.local_size[0],
726						b.shader->info.cs.local_size[1],
727						b.shader->info.cs.local_size[2], 0);
728
729	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
730
731	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
732	nir_intrinsic_set_base(src_offset, 0);
733	nir_intrinsic_set_range(src_offset, 24);
734	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
735	src_offset->num_components = is_3d ? 3 : 2;
736	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
737	nir_builder_instr_insert(&b, &src_offset->instr);
738
739	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
740	nir_intrinsic_set_base(dst_offset, 0);
741	nir_intrinsic_set_range(dst_offset, 24);
742	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
743	dst_offset->num_components = is_3d ? 3 : 2;
744	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
745	nir_builder_instr_insert(&b, &dst_offset->instr);
746
747	nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
748	nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
749
750	nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
751
752	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
753	tex->sampler_dim = dim;
754	tex->op = nir_texop_txf;
755	tex->src[0].src_type = nir_tex_src_coord;
756	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
757	tex->src[1].src_type = nir_tex_src_lod;
758	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
759	tex->src[2].src_type = nir_tex_src_texture_deref;
760	tex->src[2].src = nir_src_for_ssa(input_img_deref);
761	tex->dest_type = nir_type_float;
762	tex->is_array = false;
763	tex->coord_components = is_3d ? 3 : 2;
764
765	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
766	nir_builder_instr_insert(&b, &tex->instr);
767
768	nir_ssa_def *outval = &tex->dest.ssa;
769	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
770	store->num_components = 4;
771	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
772	store->src[1] = nir_src_for_ssa(dst_coord);
773	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
774	store->src[3] = nir_src_for_ssa(outval);
775
776	nir_builder_instr_insert(&b, &store->instr);
777	return b.shader;
778}
779
780/* image to image - don't write use image accessors */
781static VkResult
782radv_device_init_meta_itoi_state(struct radv_device *device)
783{
784	VkResult result;
785	struct radv_shader_module cs = { .nir = NULL };
786	struct radv_shader_module cs_3d = { .nir = NULL };
787	cs.nir = build_nir_itoi_compute_shader(device, false);
788	if (device->physical_device->rad_info.chip_class >= GFX9)
789		cs_3d.nir = build_nir_itoi_compute_shader(device, true);
790	/*
791	 * two descriptors one for the image being sampled
792	 * one for the buffer being written.
793	 */
794	VkDescriptorSetLayoutCreateInfo ds_create_info = {
795		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
796		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
797		.bindingCount = 2,
798		.pBindings = (VkDescriptorSetLayoutBinding[]) {
799			{
800				.binding = 0,
801				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
802				.descriptorCount = 1,
803				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
804				.pImmutableSamplers = NULL
805			},
806			{
807				.binding = 1,
808				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
809				.descriptorCount = 1,
810				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
811				.pImmutableSamplers = NULL
812			},
813		}
814	};
815
816	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
817						&ds_create_info,
818						&device->meta_state.alloc,
819						&device->meta_state.itoi.img_ds_layout);
820	if (result != VK_SUCCESS)
821		goto fail;
822
823
824	VkPipelineLayoutCreateInfo pl_create_info = {
825		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
826		.setLayoutCount = 1,
827		.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
828		.pushConstantRangeCount = 1,
829		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
830	};
831
832	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
833					  &pl_create_info,
834					  &device->meta_state.alloc,
835					  &device->meta_state.itoi.img_p_layout);
836	if (result != VK_SUCCESS)
837		goto fail;
838
839	/* compute shader */
840
841	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
842		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
843		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
844		.module = radv_shader_module_to_handle(&cs),
845		.pName = "main",
846		.pSpecializationInfo = NULL,
847	};
848
849	VkComputePipelineCreateInfo vk_pipeline_info = {
850		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
851		.stage = pipeline_shader_stage,
852		.flags = 0,
853		.layout = device->meta_state.itoi.img_p_layout,
854	};
855
856	result = radv_CreateComputePipelines(radv_device_to_handle(device),
857					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
858					     1, &vk_pipeline_info, NULL,
859					     &device->meta_state.itoi.pipeline);
860	if (result != VK_SUCCESS)
861		goto fail;
862
863	if (device->physical_device->rad_info.chip_class >= GFX9) {
864		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
865			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
866.stage = VK_SHADER_STAGE_COMPUTE_BIT,
867			.module = radv_shader_module_to_handle(&cs_3d),
868			.pName = "main",
869			.pSpecializationInfo = NULL,
870		};
871
872		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
873			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
874			.stage = pipeline_shader_stage_3d,
875			.flags = 0,
876			.layout = device->meta_state.itoi.img_p_layout,
877		};
878
879		result = radv_CreateComputePipelines(radv_device_to_handle(device),
880						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
881						     1, &vk_pipeline_info_3d, NULL,
882						     &device->meta_state.itoi.pipeline_3d);
883
884		ralloc_free(cs_3d.nir);
885	}
886	ralloc_free(cs.nir);
887
888	return VK_SUCCESS;
889fail:
890	ralloc_free(cs.nir);
891	ralloc_free(cs_3d.nir);
892	return result;
893}
894
895static void
896radv_device_finish_meta_itoi_state(struct radv_device *device)
897{
898	struct radv_meta_state *state = &device->meta_state;
899
900	radv_DestroyPipelineLayout(radv_device_to_handle(device),
901				   state->itoi.img_p_layout, &state->alloc);
902	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
903				        state->itoi.img_ds_layout,
904					&state->alloc);
905	radv_DestroyPipeline(radv_device_to_handle(device),
906			     state->itoi.pipeline, &state->alloc);
907	if (device->physical_device->rad_info.chip_class >= GFX9)
908		radv_DestroyPipeline(radv_device_to_handle(device),
909				     state->itoi.pipeline_3d, &state->alloc);
910}
911
912static nir_shader *
913build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
914{
915	nir_builder b;
916	const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
917							 false,
918							 false,
919							 GLSL_TYPE_FLOAT);
920	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
921	b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_r32g32b32_cs");
922	b.shader->info.cs.local_size[0] = 16;
923	b.shader->info.cs.local_size[1] = 16;
924	b.shader->info.cs.local_size[2] = 1;
925	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
926						      type, "input_img");
927	input_img->data.descriptor_set = 0;
928	input_img->data.binding = 0;
929
930	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
931						      type, "output_img");
932	output_img->data.descriptor_set = 0;
933	output_img->data.binding = 1;
934
935	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
936	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
937	nir_ssa_def *block_size = nir_imm_ivec4(&b,
938						b.shader->info.cs.local_size[0],
939						b.shader->info.cs.local_size[1],
940						b.shader->info.cs.local_size[2], 0);
941
942	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
943
944	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
945	nir_intrinsic_set_base(src_offset, 0);
946	nir_intrinsic_set_range(src_offset, 24);
947	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
948	src_offset->num_components = 3;
949	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, "src_offset");
950	nir_builder_instr_insert(&b, &src_offset->instr);
951
952	nir_ssa_def *src_stride = nir_channel(&b, &src_offset->dest.ssa, 2);
953
954	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
955	nir_intrinsic_set_base(dst_offset, 0);
956	nir_intrinsic_set_range(dst_offset, 24);
957	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
958	dst_offset->num_components = 3;
959	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, "dst_offset");
960	nir_builder_instr_insert(&b, &dst_offset->instr);
961
962	nir_ssa_def *dst_stride = nir_channel(&b, &dst_offset->dest.ssa, 2);
963
964	nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
965	nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
966
967	nir_ssa_def *src_global_pos =
968		nir_iadd(&b,
969			 nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
970			 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
971
972	nir_ssa_def *dst_global_pos =
973		nir_iadd(&b,
974			 nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
975			 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
976
977	for (int chan = 0; chan < 3; chan++) {
978		/* src */
979		nir_ssa_def *src_local_pos =
980			nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
981
982		nir_ssa_def *src_coord =
983			nir_vec4(&b, src_local_pos, src_local_pos,
984				 src_local_pos, src_local_pos);
985
986		nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
987
988		nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
989		tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
990		tex->op = nir_texop_txf;
991		tex->src[0].src_type = nir_tex_src_coord;
992		tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
993		tex->src[1].src_type = nir_tex_src_lod;
994		tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
995		tex->src[2].src_type = nir_tex_src_texture_deref;
996		tex->src[2].src = nir_src_for_ssa(input_img_deref);
997		tex->dest_type = nir_type_float;
998		tex->is_array = false;
999		tex->coord_components = 1;
1000		nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
1001		nir_builder_instr_insert(&b, &tex->instr);
1002
1003		nir_ssa_def *outval = &tex->dest.ssa;
1004
1005		/* dst */
1006		nir_ssa_def *dst_local_pos =
1007			nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
1008
1009		nir_ssa_def *dst_coord =
1010			nir_vec4(&b, dst_local_pos, dst_local_pos,
1011				 dst_local_pos, dst_local_pos);
1012
1013		nir_intrinsic_instr *store =
1014			nir_intrinsic_instr_create(b.shader,
1015						   nir_intrinsic_image_deref_store);
1016		store->num_components = 1;
1017		store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1018		store->src[1] = nir_src_for_ssa(dst_coord);
1019		store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1020		store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, 0));
1021		nir_builder_instr_insert(&b, &store->instr);
1022	}
1023
1024	return b.shader;
1025}
1026
1027/* Image to image - special path for R32G32B32 */
1028static VkResult
1029radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
1030{
1031	VkResult result;
1032	struct radv_shader_module cs = { .nir = NULL };
1033
1034	cs.nir = build_nir_itoi_r32g32b32_compute_shader(device);
1035
1036	VkDescriptorSetLayoutCreateInfo ds_create_info = {
1037		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1038		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1039		.bindingCount = 2,
1040		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1041			{
1042				.binding = 0,
1043				.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1044				.descriptorCount = 1,
1045				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1046				.pImmutableSamplers = NULL
1047			},
1048			{
1049				.binding = 1,
1050				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1051				.descriptorCount = 1,
1052				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1053				.pImmutableSamplers = NULL
1054			},
1055		}
1056	};
1057
1058	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1059						&ds_create_info,
1060						&device->meta_state.alloc,
1061						&device->meta_state.itoi_r32g32b32.img_ds_layout);
1062	if (result != VK_SUCCESS)
1063		goto fail;
1064
1065
1066	VkPipelineLayoutCreateInfo pl_create_info = {
1067		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1068		.setLayoutCount = 1,
1069		.pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
1070		.pushConstantRangeCount = 1,
1071		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
1072	};
1073
1074	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1075					  &pl_create_info,
1076					  &device->meta_state.alloc,
1077					  &device->meta_state.itoi_r32g32b32.img_p_layout);
1078	if (result != VK_SUCCESS)
1079		goto fail;
1080
1081	/* compute shader */
1082
1083	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1084		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1085		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1086		.module = radv_shader_module_to_handle(&cs),
1087		.pName = "main",
1088		.pSpecializationInfo = NULL,
1089	};
1090
1091	VkComputePipelineCreateInfo vk_pipeline_info = {
1092		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1093		.stage = pipeline_shader_stage,
1094		.flags = 0,
1095		.layout = device->meta_state.itoi_r32g32b32.img_p_layout,
1096	};
1097
1098	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1099					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1100					     1, &vk_pipeline_info, NULL,
1101					     &device->meta_state.itoi_r32g32b32.pipeline);
1102
1103fail:
1104	ralloc_free(cs.nir);
1105	return result;
1106}
1107
1108static void
1109radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
1110{
1111	struct radv_meta_state *state = &device->meta_state;
1112
1113	radv_DestroyPipelineLayout(radv_device_to_handle(device),
1114				   state->itoi_r32g32b32.img_p_layout, &state->alloc);
1115	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1116				        state->itoi_r32g32b32.img_ds_layout,
1117					&state->alloc);
1118	radv_DestroyPipeline(radv_device_to_handle(device),
1119			     state->itoi_r32g32b32.pipeline, &state->alloc);
1120}
1121
1122static nir_shader *
1123build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
1124{
1125	nir_builder b;
1126	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
1127	const struct glsl_type *img_type = glsl_sampler_type(dim,
1128							     false,
1129							     false,
1130							     GLSL_TYPE_FLOAT);
1131	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1132	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
1133	b.shader->info.cs.local_size[0] = 16;
1134	b.shader->info.cs.local_size[1] = 16;
1135	b.shader->info.cs.local_size[2] = 1;
1136
1137	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1138						       img_type, "out_img");
1139	output_img->data.descriptor_set = 0;
1140	output_img->data.binding = 0;
1141
1142	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1143	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1144	nir_ssa_def *block_size = nir_imm_ivec4(&b,
1145						b.shader->info.cs.local_size[0],
1146						b.shader->info.cs.local_size[1],
1147						b.shader->info.cs.local_size[2], 0);
1148
1149	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1150
1151	nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1152	nir_intrinsic_set_base(clear_val, 0);
1153	nir_intrinsic_set_range(clear_val, 20);
1154	clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1155	clear_val->num_components = 4;
1156	nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
1157	nir_builder_instr_insert(&b, &clear_val->instr);
1158
1159	nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1160	nir_intrinsic_set_base(layer, 0);
1161	nir_intrinsic_set_range(layer, 20);
1162	layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
1163	layer->num_components = 1;
1164	nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
1165	nir_builder_instr_insert(&b, &layer->instr);
1166
1167	nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
1168
1169	nir_ssa_def *comps[4];
1170	comps[0] = nir_channel(&b, global_id, 0);
1171	comps[1] = nir_channel(&b, global_id, 1);
1172	comps[2] = global_z;
1173	comps[3] = nir_imm_int(&b, 0);
1174	global_id = nir_vec(&b, comps, 4);
1175
1176	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1177	store->num_components = 4;
1178	store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1179	store->src[1] = nir_src_for_ssa(global_id);
1180	store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1181	store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
1182
1183	nir_builder_instr_insert(&b, &store->instr);
1184	return b.shader;
1185}
1186
1187static VkResult
1188radv_device_init_meta_cleari_state(struct radv_device *device)
1189{
1190	VkResult result;
1191	struct radv_shader_module cs = { .nir = NULL };
1192	struct radv_shader_module cs_3d = { .nir = NULL };
1193	cs.nir = build_nir_cleari_compute_shader(device, false);
1194	if (device->physical_device->rad_info.chip_class >= GFX9)
1195		cs_3d.nir = build_nir_cleari_compute_shader(device, true);
1196
1197	/*
1198	 * two descriptors one for the image being sampled
1199	 * one for the buffer being written.
1200	 */
1201	VkDescriptorSetLayoutCreateInfo ds_create_info = {
1202		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1203		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1204		.bindingCount = 1,
1205		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1206			{
1207				.binding = 0,
1208				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1209				.descriptorCount = 1,
1210				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1211				.pImmutableSamplers = NULL
1212			},
1213		}
1214	};
1215
1216	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1217						&ds_create_info,
1218						&device->meta_state.alloc,
1219						&device->meta_state.cleari.img_ds_layout);
1220	if (result != VK_SUCCESS)
1221		goto fail;
1222
1223
1224	VkPipelineLayoutCreateInfo pl_create_info = {
1225		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1226		.setLayoutCount = 1,
1227		.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1228		.pushConstantRangeCount = 1,
1229		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1230	};
1231
1232	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1233					  &pl_create_info,
1234					  &device->meta_state.alloc,
1235					  &device->meta_state.cleari.img_p_layout);
1236	if (result != VK_SUCCESS)
1237		goto fail;
1238
1239	/* compute shader */
1240
1241	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1242		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1243		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1244		.module = radv_shader_module_to_handle(&cs),
1245		.pName = "main",
1246		.pSpecializationInfo = NULL,
1247	};
1248
1249	VkComputePipelineCreateInfo vk_pipeline_info = {
1250		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1251		.stage = pipeline_shader_stage,
1252		.flags = 0,
1253		.layout = device->meta_state.cleari.img_p_layout,
1254	};
1255
1256	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1257					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1258					     1, &vk_pipeline_info, NULL,
1259					     &device->meta_state.cleari.pipeline);
1260	if (result != VK_SUCCESS)
1261		goto fail;
1262
1263
1264	if (device->physical_device->rad_info.chip_class >= GFX9) {
1265		/* compute shader */
1266		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1267			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1268			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1269			.module = radv_shader_module_to_handle(&cs_3d),
1270			.pName = "main",
1271			.pSpecializationInfo = NULL,
1272		};
1273
1274		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1275			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1276			.stage = pipeline_shader_stage_3d,
1277			.flags = 0,
1278			.layout = device->meta_state.cleari.img_p_layout,
1279		};
1280
1281		result = radv_CreateComputePipelines(radv_device_to_handle(device),
1282						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1283						     1, &vk_pipeline_info_3d, NULL,
1284						     &device->meta_state.cleari.pipeline_3d);
1285		if (result != VK_SUCCESS)
1286			goto fail;
1287
1288		ralloc_free(cs_3d.nir);
1289	}
1290	ralloc_free(cs.nir);
1291	return VK_SUCCESS;
1292fail:
1293	ralloc_free(cs.nir);
1294	ralloc_free(cs_3d.nir);
1295	return result;
1296}
1297
1298static void
1299radv_device_finish_meta_cleari_state(struct radv_device *device)
1300{
1301	struct radv_meta_state *state = &device->meta_state;
1302
1303	radv_DestroyPipelineLayout(radv_device_to_handle(device),
1304				   state->cleari.img_p_layout, &state->alloc);
1305	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1306				        state->cleari.img_ds_layout,
1307					&state->alloc);
1308	radv_DestroyPipeline(radv_device_to_handle(device),
1309			     state->cleari.pipeline, &state->alloc);
1310	radv_DestroyPipeline(radv_device_to_handle(device),
1311			     state->cleari.pipeline_3d, &state->alloc);
1312}
1313
1314/* Special path for clearing R32G32B32 images using a compute shader. */
1315static nir_shader *
1316build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1317{
1318	nir_builder b;
1319	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
1320							     false,
1321							     false,
1322							     GLSL_TYPE_FLOAT);
1323	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1324	b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
1325	b.shader->info.cs.local_size[0] = 16;
1326	b.shader->info.cs.local_size[1] = 16;
1327	b.shader->info.cs.local_size[2] = 1;
1328
1329	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1330						       img_type, "out_img");
1331	output_img->data.descriptor_set = 0;
1332	output_img->data.binding = 0;
1333
1334	nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1335	nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1336	nir_ssa_def *block_size = nir_imm_ivec4(&b,
1337						b.shader->info.cs.local_size[0],
1338						b.shader->info.cs.local_size[1],
1339						b.shader->info.cs.local_size[2], 0);
1340
1341	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1342
1343	nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1344	nir_intrinsic_set_base(clear_val, 0);
1345	nir_intrinsic_set_range(clear_val, 16);
1346	clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1347	clear_val->num_components = 3;
1348	nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
1349	nir_builder_instr_insert(&b, &clear_val->instr);
1350
1351	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1352	nir_intrinsic_set_base(stride, 0);
1353	nir_intrinsic_set_range(stride, 16);
1354	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
1355	stride->num_components = 1;
1356	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
1357	nir_builder_instr_insert(&b, &stride->instr);
1358
1359	nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1360	nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1361
1362	nir_ssa_def *global_pos =
1363		nir_iadd(&b,
1364			 nir_imul(&b, global_y, &stride->dest.ssa),
1365			 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1366
1367	for (unsigned chan = 0; chan < 3; chan++) {
1368		nir_ssa_def *local_pos =
1369			nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1370
1371		nir_ssa_def *coord =
1372			nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1373
1374		nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1375		store->num_components = 1;
1376		store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1377		store->src[1] = nir_src_for_ssa(coord);
1378		store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1379		store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
1380		nir_builder_instr_insert(&b, &store->instr);
1381	}
1382
1383	return b.shader;
1384}
1385
1386static VkResult
1387radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1388{
1389	VkResult result;
1390	struct radv_shader_module cs = { .nir = NULL };
1391
1392	cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
1393
1394	VkDescriptorSetLayoutCreateInfo ds_create_info = {
1395		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1396		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1397		.bindingCount = 1,
1398		.pBindings = (VkDescriptorSetLayoutBinding[]) {
1399			{
1400				.binding = 0,
1401				.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1402				.descriptorCount = 1,
1403				.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1404				.pImmutableSamplers = NULL
1405			},
1406		}
1407	};
1408
1409	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1410						&ds_create_info,
1411						&device->meta_state.alloc,
1412						&device->meta_state.cleari_r32g32b32.img_ds_layout);
1413	if (result != VK_SUCCESS)
1414		goto fail;
1415
1416	VkPipelineLayoutCreateInfo pl_create_info = {
1417		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1418		.setLayoutCount = 1,
1419		.pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1420		.pushConstantRangeCount = 1,
1421		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1422	};
1423
1424	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1425					   &pl_create_info,
1426					   &device->meta_state.alloc,
1427					   &device->meta_state.cleari_r32g32b32.img_p_layout);
1428	if (result != VK_SUCCESS)
1429		goto fail;
1430
1431	/* compute shader */
1432	VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1433		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1434		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1435		.module = radv_shader_module_to_handle(&cs),
1436		.pName = "main",
1437		.pSpecializationInfo = NULL,
1438	};
1439
1440	VkComputePipelineCreateInfo vk_pipeline_info = {
1441		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1442		.stage = pipeline_shader_stage,
1443		.flags = 0,
1444		.layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1445	};
1446
1447	result = radv_CreateComputePipelines(radv_device_to_handle(device),
1448					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
1449					     1, &vk_pipeline_info, NULL,
1450					     &device->meta_state.cleari_r32g32b32.pipeline);
1451
1452fail:
1453	ralloc_free(cs.nir);
1454	return result;
1455}
1456
1457static void
1458radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1459{
1460	struct radv_meta_state *state = &device->meta_state;
1461
1462	radv_DestroyPipelineLayout(radv_device_to_handle(device),
1463				   state->cleari_r32g32b32.img_p_layout,
1464				   &state->alloc);
1465	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1466				        state->cleari_r32g32b32.img_ds_layout,
1467					&state->alloc);
1468	radv_DestroyPipeline(radv_device_to_handle(device),
1469			     state->cleari_r32g32b32.pipeline, &state->alloc);
1470}
1471
1472void
1473radv_device_finish_meta_bufimage_state(struct radv_device *device)
1474{
1475	radv_device_finish_meta_itob_state(device);
1476	radv_device_finish_meta_btoi_state(device);
1477	radv_device_finish_meta_btoi_r32g32b32_state(device);
1478	radv_device_finish_meta_itoi_state(device);
1479	radv_device_finish_meta_itoi_r32g32b32_state(device);
1480	radv_device_finish_meta_cleari_state(device);
1481	radv_device_finish_meta_cleari_r32g32b32_state(device);
1482}
1483
1484VkResult
1485radv_device_init_meta_bufimage_state(struct radv_device *device)
1486{
1487	VkResult result;
1488
1489	result = radv_device_init_meta_itob_state(device);
1490	if (result != VK_SUCCESS)
1491		goto fail_itob;
1492
1493	result = radv_device_init_meta_btoi_state(device);
1494	if (result != VK_SUCCESS)
1495		goto fail_btoi;
1496
1497	result = radv_device_init_meta_btoi_r32g32b32_state(device);
1498	if (result != VK_SUCCESS)
1499		goto fail_btoi_r32g32b32;
1500
1501	result = radv_device_init_meta_itoi_state(device);
1502	if (result != VK_SUCCESS)
1503		goto fail_itoi;
1504
1505	result = radv_device_init_meta_itoi_r32g32b32_state(device);
1506	if (result != VK_SUCCESS)
1507		goto fail_itoi_r32g32b32;
1508
1509	result = radv_device_init_meta_cleari_state(device);
1510	if (result != VK_SUCCESS)
1511		goto fail_cleari;
1512
1513	result = radv_device_init_meta_cleari_r32g32b32_state(device);
1514	if (result != VK_SUCCESS)
1515		goto fail_cleari_r32g32b32;
1516
1517	return VK_SUCCESS;
1518fail_cleari_r32g32b32:
1519	radv_device_finish_meta_cleari_r32g32b32_state(device);
1520fail_cleari:
1521	radv_device_finish_meta_cleari_state(device);
1522fail_itoi_r32g32b32:
1523	radv_device_finish_meta_itoi_r32g32b32_state(device);
1524fail_itoi:
1525	radv_device_finish_meta_itoi_state(device);
1526fail_btoi_r32g32b32:
1527	radv_device_finish_meta_btoi_r32g32b32_state(device);
1528fail_btoi:
1529	radv_device_finish_meta_btoi_state(device);
1530fail_itob:
1531	radv_device_finish_meta_itob_state(device);
1532	return result;
1533}
1534
1535static void
1536create_iview(struct radv_cmd_buffer *cmd_buffer,
1537             struct radv_meta_blit2d_surf *surf,
1538             struct radv_image_view *iview)
1539{
1540	VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
1541		radv_meta_get_view_type(surf->image);
1542	radv_image_view_init(iview, cmd_buffer->device,
1543			     &(VkImageViewCreateInfo) {
1544				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1545					     .image = radv_image_to_handle(surf->image),
1546					     .viewType = view_type,
1547					     .format = surf->format,
1548					     .subresourceRange = {
1549					     .aspectMask = surf->aspect_mask,
1550					     .baseMipLevel = surf->level,
1551					     .levelCount = 1,
1552					     .baseArrayLayer = surf->layer,
1553					     .layerCount = 1
1554				     },
1555			     });
1556}
1557
1558static void
1559create_bview(struct radv_cmd_buffer *cmd_buffer,
1560	     struct radv_buffer *buffer,
1561	     unsigned offset,
1562	     VkFormat format,
1563	     struct radv_buffer_view *bview)
1564{
1565	radv_buffer_view_init(bview, cmd_buffer->device,
1566			      &(VkBufferViewCreateInfo) {
1567				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1568				      .flags = 0,
1569				      .buffer = radv_buffer_to_handle(buffer),
1570				      .format = format,
1571				      .offset = offset,
1572				      .range = VK_WHOLE_SIZE,
1573			      });
1574
1575}
1576
1577static void
1578create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
1579			 struct radv_meta_blit2d_surf *surf,
1580			 VkBufferUsageFlagBits usage,
1581			 VkBuffer *buffer)
1582{
1583	struct radv_device *device = cmd_buffer->device;
1584	struct radv_device_memory mem = { .bo = surf->image->bo };
1585
1586	radv_CreateBuffer(radv_device_to_handle(device),
1587			  &(VkBufferCreateInfo) {
1588				.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1589				.flags = 0,
1590				.size = surf->image->size,
1591				.usage = usage,
1592				.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1593			  }, NULL, buffer);
1594
1595	radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1596			       (VkBindBufferMemoryInfo[]) {
1597				    {
1598					.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1599					.buffer = *buffer,
1600					.memory = radv_device_memory_to_handle(&mem),
1601					.memoryOffset = surf->image->offset,
1602				    }
1603			       });
1604}
1605
1606static void
1607create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1608			   struct radv_buffer *buffer,
1609			   unsigned offset,
1610			   VkFormat src_format,
1611			   struct radv_buffer_view *bview)
1612{
1613	VkFormat format;
1614
1615	switch (src_format) {
1616	case VK_FORMAT_R32G32B32_UINT:
1617		format = VK_FORMAT_R32_UINT;
1618		break;
1619	case VK_FORMAT_R32G32B32_SINT:
1620		format = VK_FORMAT_R32_SINT;
1621		break;
1622	case VK_FORMAT_R32G32B32_SFLOAT:
1623		format = VK_FORMAT_R32_SFLOAT;
1624		break;
1625	default:
1626		unreachable("invalid R32G32B32 format");
1627	}
1628
1629	radv_buffer_view_init(bview, cmd_buffer->device,
1630			      &(VkBufferViewCreateInfo) {
1631				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1632				      .flags = 0,
1633				      .buffer = radv_buffer_to_handle(buffer),
1634				      .format = format,
1635				      .offset = offset,
1636				      .range = VK_WHOLE_SIZE,
1637			      });
1638}
1639
1640static unsigned
1641get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1642			       struct radv_meta_blit2d_surf *surf)
1643{
1644	unsigned stride;
1645
1646	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1647		stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1648	} else {
1649		stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1650	}
1651
1652	return stride;
1653}
1654
1655static void
1656itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1657		      struct radv_image_view *src,
1658		      struct radv_buffer_view *dst)
1659{
1660	struct radv_device *device = cmd_buffer->device;
1661
1662	radv_meta_push_descriptor_set(cmd_buffer,
1663				      VK_PIPELINE_BIND_POINT_COMPUTE,
1664				      device->meta_state.itob.img_p_layout,
1665				      0, /* set */
1666				      2, /* descriptorWriteCount */
1667				      (VkWriteDescriptorSet[]) {
1668				              {
1669				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1670				                      .dstBinding = 0,
1671				                      .dstArrayElement = 0,
1672				                      .descriptorCount = 1,
1673				                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1674				                      .pImageInfo = (VkDescriptorImageInfo[]) {
1675				                              {
1676				                                      .sampler = VK_NULL_HANDLE,
1677				                                      .imageView = radv_image_view_to_handle(src),
1678				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1679				                              },
1680				                      }
1681				              },
1682				              {
1683				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1684				                      .dstBinding = 1,
1685				                      .dstArrayElement = 0,
1686				                      .descriptorCount = 1,
1687				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1688				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
1689				              }
1690				      });
1691}
1692
1693void
1694radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
1695			  struct radv_meta_blit2d_surf *src,
1696			  struct radv_meta_blit2d_buffer *dst,
1697			  unsigned num_rects,
1698			  struct radv_meta_blit2d_rect *rects)
1699{
1700	VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1701	struct radv_device *device = cmd_buffer->device;
1702	struct radv_image_view src_view;
1703	struct radv_buffer_view dst_view;
1704
1705	create_iview(cmd_buffer, src, &src_view);
1706	create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1707	itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1708
1709	if (device->physical_device->rad_info.chip_class >= GFX9 &&
1710	    src->image->type == VK_IMAGE_TYPE_3D)
1711		pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1712
1713	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1714			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1715
1716	for (unsigned r = 0; r < num_rects; ++r) {
1717		unsigned push_constants[4] = {
1718			rects[r].src_x,
1719			rects[r].src_y,
1720			src->layer,
1721			dst->pitch
1722		};
1723		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1724				      device->meta_state.itob.img_p_layout,
1725				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1726				      push_constants);
1727
1728		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1729	}
1730}
1731
1732static void
1733btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1734				struct radv_buffer_view *src,
1735				struct radv_buffer_view *dst)
1736{
1737	struct radv_device *device = cmd_buffer->device;
1738
1739	radv_meta_push_descriptor_set(cmd_buffer,
1740				      VK_PIPELINE_BIND_POINT_COMPUTE,
1741				      device->meta_state.btoi_r32g32b32.img_p_layout,
1742				      0, /* set */
1743				      2, /* descriptorWriteCount */
1744				      (VkWriteDescriptorSet[]) {
1745				              {
1746				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1747				                      .dstBinding = 0,
1748				                      .dstArrayElement = 0,
1749				                      .descriptorCount = 1,
1750				                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1751				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
1752				              },
1753				              {
1754				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1755				                      .dstBinding = 1,
1756				                      .dstArrayElement = 0,
1757				                      .descriptorCount = 1,
1758				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1759				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
1760				              }
1761				      });
1762}
1763
1764static void
1765radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1766				       struct radv_meta_blit2d_buffer *src,
1767				       struct radv_meta_blit2d_surf *dst,
1768				       unsigned num_rects,
1769				       struct radv_meta_blit2d_rect *rects)
1770{
1771	VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1772	struct radv_device *device = cmd_buffer->device;
1773	struct radv_buffer_view src_view, dst_view;
1774	unsigned dst_offset = 0;
1775	unsigned stride;
1776	VkBuffer buffer;
1777
1778	/* This special btoi path for R32G32B32 formats will write the linear
1779	 * image as a buffer with the same underlying memory. The compute
1780	 * shader will copy all components separately using a R32 format.
1781	 */
1782	create_buffer_from_image(cmd_buffer, dst,
1783				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1784				 &buffer);
1785
1786	create_bview(cmd_buffer, src->buffer, src->offset,
1787		     src->format, &src_view);
1788	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
1789				   dst_offset, dst->format, &dst_view);
1790	btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1791
1792	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1793			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1794
1795	stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1796
1797	for (unsigned r = 0; r < num_rects; ++r) {
1798		unsigned push_constants[4] = {
1799			rects[r].dst_x,
1800			rects[r].dst_y,
1801			stride,
1802			src->pitch,
1803		};
1804
1805		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1806				      device->meta_state.btoi_r32g32b32.img_p_layout,
1807				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1808				      push_constants);
1809
1810		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1811	}
1812
1813	radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1814}
1815
1816static void
1817btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1818		      struct radv_buffer_view *src,
1819		      struct radv_image_view *dst)
1820{
1821	struct radv_device *device = cmd_buffer->device;
1822
1823	radv_meta_push_descriptor_set(cmd_buffer,
1824				      VK_PIPELINE_BIND_POINT_COMPUTE,
1825				      device->meta_state.btoi.img_p_layout,
1826				      0, /* set */
1827				      2, /* descriptorWriteCount */
1828				      (VkWriteDescriptorSet[]) {
1829				              {
1830				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1831				                      .dstBinding = 0,
1832				                      .dstArrayElement = 0,
1833				                      .descriptorCount = 1,
1834				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1835				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
1836				              },
1837				              {
1838				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1839				                      .dstBinding = 1,
1840				                      .dstArrayElement = 0,
1841				                      .descriptorCount = 1,
1842				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1843				                      .pImageInfo = (VkDescriptorImageInfo[]) {
1844				                              {
1845				                                      .sampler = VK_NULL_HANDLE,
1846				                                      .imageView = radv_image_view_to_handle(dst),
1847				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1848				                              },
1849				                      }
1850				              }
1851				      });
1852}
1853
1854void
1855radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1856			     struct radv_meta_blit2d_buffer *src,
1857			     struct radv_meta_blit2d_surf *dst,
1858			     unsigned num_rects,
1859			     struct radv_meta_blit2d_rect *rects)
1860{
1861	VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1862	struct radv_device *device = cmd_buffer->device;
1863	struct radv_buffer_view src_view;
1864	struct radv_image_view dst_view;
1865
1866	if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1867	    dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1868	    dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1869		radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
1870						       num_rects, rects);
1871		return;
1872	}
1873
1874	create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1875	create_iview(cmd_buffer, dst, &dst_view);
1876	btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1877
1878	if (device->physical_device->rad_info.chip_class >= GFX9 &&
1879	    dst->image->type == VK_IMAGE_TYPE_3D)
1880		pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1881	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1882			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1883
1884	for (unsigned r = 0; r < num_rects; ++r) {
1885		unsigned push_constants[4] = {
1886			rects[r].dst_x,
1887			rects[r].dst_y,
1888			dst->layer,
1889			src->pitch,
1890		};
1891		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1892				      device->meta_state.btoi.img_p_layout,
1893				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1894				      push_constants);
1895
1896		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1897	}
1898}
1899
1900static void
1901itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1902				struct radv_buffer_view *src,
1903				struct radv_buffer_view *dst)
1904{
1905	struct radv_device *device = cmd_buffer->device;
1906
1907	radv_meta_push_descriptor_set(cmd_buffer,
1908				      VK_PIPELINE_BIND_POINT_COMPUTE,
1909				      device->meta_state.itoi_r32g32b32.img_p_layout,
1910				      0, /* set */
1911				      2, /* descriptorWriteCount */
1912				      (VkWriteDescriptorSet[]) {
1913				              {
1914				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1915				                      .dstBinding = 0,
1916				                      .dstArrayElement = 0,
1917				                      .descriptorCount = 1,
1918				                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1919				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(src) },
1920				              },
1921				              {
1922				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1923				                      .dstBinding = 1,
1924				                      .dstArrayElement = 0,
1925				                      .descriptorCount = 1,
1926				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1927				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(dst) },
1928				              }
1929				      });
1930}
1931
1932static void
1933radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1934				      struct radv_meta_blit2d_surf *src,
1935				      struct radv_meta_blit2d_surf *dst,
1936				      unsigned num_rects,
1937				      struct radv_meta_blit2d_rect *rects)
1938{
1939	VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1940	struct radv_device *device = cmd_buffer->device;
1941	struct radv_buffer_view src_view, dst_view;
1942	unsigned src_offset = 0, dst_offset = 0;
1943	unsigned src_stride, dst_stride;
1944	VkBuffer src_buffer, dst_buffer;
1945
1946	/* 96-bit formats are only compatible to themselves. */
1947	assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
1948	       dst->format == VK_FORMAT_R32G32B32_SINT ||
1949	       dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1950
1951	/* This special itoi path for R32G32B32 formats will write the linear
1952	 * image as a buffer with the same underlying memory. The compute
1953	 * shader will copy all components separately using a R32 format.
1954	 */
1955	create_buffer_from_image(cmd_buffer, src,
1956				 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1957				 &src_buffer);
1958	create_buffer_from_image(cmd_buffer, dst,
1959				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1960				 &dst_buffer);
1961
1962	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
1963				   src_offset, src->format, &src_view);
1964	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
1965				   dst_offset, dst->format, &dst_view);
1966	itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1967
1968	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1969			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1970
1971	src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1972	dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1973
1974	for (unsigned r = 0; r < num_rects; ++r) {
1975		unsigned push_constants[6] = {
1976			rects[r].src_x,
1977			rects[r].src_y,
1978			src_stride,
1979			rects[r].dst_x,
1980			rects[r].dst_y,
1981			dst_stride,
1982		};
1983		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1984				      device->meta_state.itoi_r32g32b32.img_p_layout,
1985				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1986				      push_constants);
1987
1988		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1989	}
1990
1991	radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1992	radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1993}
1994
1995static void
1996itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1997		      struct radv_image_view *src,
1998		      struct radv_image_view *dst)
1999{
2000	struct radv_device *device = cmd_buffer->device;
2001
2002	radv_meta_push_descriptor_set(cmd_buffer,
2003				      VK_PIPELINE_BIND_POINT_COMPUTE,
2004				      device->meta_state.itoi.img_p_layout,
2005				      0, /* set */
2006				      2, /* descriptorWriteCount */
2007				      (VkWriteDescriptorSet[]) {
2008				              {
2009				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2010				                       .dstBinding = 0,
2011				                       .dstArrayElement = 0,
2012				                       .descriptorCount = 1,
2013				                       .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
2014				                       .pImageInfo = (VkDescriptorImageInfo[]) {
2015				                               {
2016				                                       .sampler = VK_NULL_HANDLE,
2017				                                       .imageView = radv_image_view_to_handle(src),
2018				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2019				                               },
2020				                       }
2021				              },
2022				              {
2023				                       .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2024				                       .dstBinding = 1,
2025				                       .dstArrayElement = 0,
2026				                       .descriptorCount = 1,
2027				                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2028				                       .pImageInfo = (VkDescriptorImageInfo[]) {
2029				                               {
2030				                                       .sampler = VK_NULL_HANDLE,
2031				                                       .imageView = radv_image_view_to_handle(dst),
2032				                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2033				                               },
2034				                       }
2035				              }
2036				      });
2037}
2038
2039void
2040radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
2041			    struct radv_meta_blit2d_surf *src,
2042			    struct radv_meta_blit2d_surf *dst,
2043			    unsigned num_rects,
2044			    struct radv_meta_blit2d_rect *rects)
2045{
2046	VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
2047	struct radv_device *device = cmd_buffer->device;
2048	struct radv_image_view src_view, dst_view;
2049
2050	if (src->format == VK_FORMAT_R32G32B32_UINT ||
2051	    src->format == VK_FORMAT_R32G32B32_SINT ||
2052	    src->format == VK_FORMAT_R32G32B32_SFLOAT) {
2053		radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
2054						      num_rects, rects);
2055		return;
2056	}
2057
2058	create_iview(cmd_buffer, src, &src_view);
2059	create_iview(cmd_buffer, dst, &dst_view);
2060
2061	itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
2062
2063	if (device->physical_device->rad_info.chip_class >= GFX9 &&
2064	    (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
2065		pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
2066	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2067			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2068
2069	for (unsigned r = 0; r < num_rects; ++r) {
2070		unsigned push_constants[6] = {
2071			rects[r].src_x,
2072			rects[r].src_y,
2073			src->layer,
2074			rects[r].dst_x,
2075			rects[r].dst_y,
2076			dst->layer,
2077		};
2078		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2079				      device->meta_state.itoi.img_p_layout,
2080				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
2081				      push_constants);
2082
2083		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
2084	}
2085}
2086
2087static void
2088cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2089				  struct radv_buffer_view *view)
2090{
2091	struct radv_device *device = cmd_buffer->device;
2092
2093	radv_meta_push_descriptor_set(cmd_buffer,
2094				      VK_PIPELINE_BIND_POINT_COMPUTE,
2095				      device->meta_state.cleari_r32g32b32.img_p_layout,
2096				      0, /* set */
2097				      1, /* descriptorWriteCount */
2098				      (VkWriteDescriptorSet[]) {
2099				              {
2100				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2101				                      .dstBinding = 0,
2102				                      .dstArrayElement = 0,
2103				                      .descriptorCount = 1,
2104				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
2105				                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(view) },
2106				              }
2107				      });
2108}
2109
2110static void
2111radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
2112				   struct radv_meta_blit2d_surf *dst,
2113				   const VkClearColorValue *clear_color)
2114{
2115	VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
2116	struct radv_device *device = cmd_buffer->device;
2117	struct radv_buffer_view dst_view;
2118	unsigned stride;
2119	VkBuffer buffer;
2120
2121	/* This special clear path for R32G32B32 formats will write the linear
2122	 * image as a buffer with the same underlying memory. The compute
2123	 * shader will clear all components separately using a R32 format.
2124	 */
2125	create_buffer_from_image(cmd_buffer, dst,
2126				 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
2127				 &buffer);
2128
2129	create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
2130				   0, dst->format, &dst_view);
2131	cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
2132
2133	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2134			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2135
2136	stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
2137
2138	unsigned push_constants[4] = {
2139		clear_color->uint32[0],
2140		clear_color->uint32[1],
2141		clear_color->uint32[2],
2142		stride,
2143	};
2144
2145	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2146			      device->meta_state.cleari_r32g32b32.img_p_layout,
2147			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
2148			      push_constants);
2149
2150	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
2151				dst->image->info.height, 1);
2152
2153	radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
2154}
2155
2156static void
2157cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2158	                struct radv_image_view *dst_iview)
2159{
2160	struct radv_device *device = cmd_buffer->device;
2161
2162	radv_meta_push_descriptor_set(cmd_buffer,
2163				      VK_PIPELINE_BIND_POINT_COMPUTE,
2164				      device->meta_state.cleari.img_p_layout,
2165				      0, /* set */
2166				      1, /* descriptorWriteCount */
2167				      (VkWriteDescriptorSet[]) {
2168				              {
2169				                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2170				                      .dstBinding = 0,
2171				                      .dstArrayElement = 0,
2172				                      .descriptorCount = 1,
2173				                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2174				                      .pImageInfo = (VkDescriptorImageInfo[]) {
2175				                               {
2176				                                      .sampler = VK_NULL_HANDLE,
2177				                                      .imageView = radv_image_view_to_handle(dst_iview),
2178				                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2179				                               },
2180				                      }
2181				               },
2182				      });
2183}
2184
2185void
2186radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
2187			 struct radv_meta_blit2d_surf *dst,
2188			 const VkClearColorValue *clear_color)
2189{
2190	VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
2191	struct radv_device *device = cmd_buffer->device;
2192	struct radv_image_view dst_iview;
2193
2194	if (dst->format == VK_FORMAT_R32G32B32_UINT ||
2195	    dst->format == VK_FORMAT_R32G32B32_SINT ||
2196	    dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
2197		radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
2198		return;
2199	}
2200
2201	create_iview(cmd_buffer, dst, &dst_iview);
2202	cleari_bind_descriptors(cmd_buffer, &dst_iview);
2203
2204	if (device->physical_device->rad_info.chip_class >= GFX9 &&
2205	    dst->image->type == VK_IMAGE_TYPE_3D)
2206		pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
2207
2208	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2209			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2210
2211	unsigned push_constants[5] = {
2212		clear_color->uint32[0],
2213		clear_color->uint32[1],
2214		clear_color->uint32[2],
2215		clear_color->uint32[3],
2216		dst->layer,
2217	};
2218
2219	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2220			      device->meta_state.cleari.img_p_layout,
2221			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
2222			      push_constants);
2223
2224	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
2225}
2226