radv_meta_blit2d.c revision 01e04c3f
1/*
2 * Copyright © 2016 Red Hat
3 *
4 * based on anv driver:
5 * Copyright © 2016 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26
27#include "radv_meta.h"
28#include "nir/nir_builder.h"
29#include "vk_format.h"
30
31enum blit2d_src_type {
32	BLIT2D_SRC_TYPE_IMAGE,
33	BLIT2D_SRC_TYPE_IMAGE_3D,
34	BLIT2D_SRC_TYPE_BUFFER,
35	BLIT2D_NUM_SRC_TYPES,
36};
37
38static VkResult
39blit2d_init_color_pipeline(struct radv_device *device,
40			   enum blit2d_src_type src_type,
41			   VkFormat format,
42			   uint32_t log2_samples);
43
44static VkResult
45blit2d_init_depth_only_pipeline(struct radv_device *device,
46				enum blit2d_src_type src_type,
47				uint32_t log2_samples);
48
49static VkResult
50blit2d_init_stencil_only_pipeline(struct radv_device *device,
51				  enum blit2d_src_type src_type,
52				  uint32_t log2_samples);
53
54static void
55create_iview(struct radv_cmd_buffer *cmd_buffer,
56             struct radv_meta_blit2d_surf *surf,
57             struct radv_image_view *iview, VkFormat depth_format,
58              VkImageAspectFlagBits aspects)
59{
60	VkFormat format;
61	VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
62		radv_meta_get_view_type(surf->image);
63
64	if (depth_format)
65		format = depth_format;
66	else
67		format = surf->format;
68
69	radv_image_view_init(iview, cmd_buffer->device,
70			     &(VkImageViewCreateInfo) {
71				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
72					     .image = radv_image_to_handle(surf->image),
73					     .viewType = view_type,
74					     .format = format,
75					     .subresourceRange = {
76					     .aspectMask = aspects,
77					     .baseMipLevel = surf->level,
78					     .levelCount = 1,
79					     .baseArrayLayer = surf->layer,
80					     .layerCount = 1
81				     },
82			     });
83}
84
85static void
86create_bview(struct radv_cmd_buffer *cmd_buffer,
87	     struct radv_meta_blit2d_buffer *src,
88	     struct radv_buffer_view *bview, VkFormat depth_format)
89{
90	VkFormat format;
91
92	if (depth_format)
93		format = depth_format;
94	else
95		format = src->format;
96	radv_buffer_view_init(bview, cmd_buffer->device,
97			      &(VkBufferViewCreateInfo) {
98				      .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
99				      .flags = 0,
100				      .buffer = radv_buffer_to_handle(src->buffer),
101				      .format = format,
102				      .offset = src->offset,
103				      .range = VK_WHOLE_SIZE,
104			      });
105
106}
107
108struct blit2d_src_temps {
109	struct radv_image_view iview;
110	struct radv_buffer_view bview;
111};
112
113static void
114blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
115                struct radv_meta_blit2d_surf *src_img,
116                struct radv_meta_blit2d_buffer *src_buf,
117                struct blit2d_src_temps *tmp,
118                enum blit2d_src_type src_type, VkFormat depth_format,
119                VkImageAspectFlagBits aspects,
120                uint32_t log2_samples)
121{
122	struct radv_device *device = cmd_buffer->device;
123
124	if (src_type == BLIT2D_SRC_TYPE_BUFFER) {
125		create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format);
126
127		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
128					      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
129					      0, /* set */
130					      1, /* descriptorWriteCount */
131					      (VkWriteDescriptorSet[]) {
132					              {
133					                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
134					                      .dstBinding = 0,
135					                      .dstArrayElement = 0,
136					                      .descriptorCount = 1,
137					                      .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
138					                      .pTexelBufferView = (VkBufferView[])  { radv_buffer_view_to_handle(&tmp->bview) }
139					              }
140					      });
141
142		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
143				      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
144				      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
145				      &src_buf->pitch);
146	} else {
147		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
148
149		if (src_type == BLIT2D_SRC_TYPE_IMAGE_3D)
150			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
151					      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
152					      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
153					      &src_img->layer);
154
155		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
156					      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
157					      0, /* set */
158					      1, /* descriptorWriteCount */
159					      (VkWriteDescriptorSet[]) {
160					              {
161					                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
162					                      .dstBinding = 0,
163					                      .dstArrayElement = 0,
164					                      .descriptorCount = 1,
165					                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
166					                      .pImageInfo = (VkDescriptorImageInfo[]) {
167					                              {
168					                                      .sampler = VK_NULL_HANDLE,
169					                                      .imageView = radv_image_view_to_handle(&tmp->iview),
170					                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
171					                              },
172					                      }
173					              }
174					      });
175	}
176}
177
178struct blit2d_dst_temps {
179	VkImage image;
180	struct radv_image_view iview;
181	VkFramebuffer fb;
182};
183
184static void
185blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
186                struct radv_meta_blit2d_surf *dst,
187                uint32_t width,
188                uint32_t height,
189		VkFormat depth_format,
190                struct blit2d_dst_temps *tmp,
191                VkImageAspectFlagBits aspects)
192{
193	create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);
194
195	radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
196			       &(VkFramebufferCreateInfo) {
197				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
198					       .attachmentCount = 1,
199					       .pAttachments = (VkImageView[]) {
200					       radv_image_view_to_handle(&tmp->iview),
201				       },
202				       .width = width,
203				       .height = height,
204				       .layers = 1
205				}, &cmd_buffer->pool->alloc, &tmp->fb);
206}
207
208static void
209bind_pipeline(struct radv_cmd_buffer *cmd_buffer,
210              enum blit2d_src_type src_type, unsigned fs_key,
211              uint32_t log2_samples)
212{
213	VkPipeline pipeline =
214		cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key];
215
216	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
217			     VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
218}
219
220static void
221bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
222		    enum blit2d_src_type src_type,
223		    uint32_t log2_samples)
224{
225	VkPipeline pipeline =
226		cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type];
227
228	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
229			     VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
230}
231
232static void
233bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
234		      enum blit2d_src_type src_type,
235		      uint32_t log2_samples)
236{
237	VkPipeline pipeline =
238		cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type];
239
240	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
241			     VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
242}
243
244static void
245radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
246			    struct radv_meta_blit2d_surf *src_img,
247			    struct radv_meta_blit2d_buffer *src_buf,
248			    struct radv_meta_blit2d_surf *dst,
249			    unsigned num_rects,
250			    struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type,
251			    uint32_t log2_samples)
252{
253	struct radv_device *device = cmd_buffer->device;
254
255	for (unsigned r = 0; r < num_rects; ++r) {
256		unsigned i;
257		for_each_bit(i, dst->aspect_mask) {
258			unsigned aspect_mask = 1u << i;
259			VkFormat depth_format = 0;
260			if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
261				depth_format = vk_format_stencil_only(dst->image->vk_format);
262			else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
263				depth_format = vk_format_depth_only(dst->image->vk_format);
264			struct blit2d_src_temps src_temps;
265			blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, aspect_mask, log2_samples);
266
267			struct blit2d_dst_temps dst_temps;
268			blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
269					rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);
270
271			float vertex_push_constants[4] = {
272				rects[r].src_x,
273				rects[r].src_y,
274				rects[r].src_x + rects[r].width,
275				rects[r].src_y + rects[r].height,
276			};
277
278			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
279					device->meta_state.blit2d[log2_samples].p_layouts[src_type],
280					VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
281					vertex_push_constants);
282
283			if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
284				unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
285				unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
286
287				if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
288					VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
289					if (ret != VK_SUCCESS) {
290						cmd_buffer->record_result = ret;
291						goto fail_pipeline;
292					}
293				}
294
295				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
296							&(VkRenderPassBeginInfo) {
297								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
298									.renderPass = device->meta_state.blit2d_render_passes[fs_key][dst_layout],
299									.framebuffer = dst_temps.fb,
300									.renderArea = {
301									.offset = { rects[r].dst_x, rects[r].dst_y, },
302									.extent = { rects[r].width, rects[r].height },
303								},
304									.clearValueCount = 0,
305										.pClearValues = NULL,
306										}, VK_SUBPASS_CONTENTS_INLINE);
307
308
309				bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
310			} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
311				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
312
313				if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) {
314					VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
315					if (ret != VK_SUCCESS) {
316						cmd_buffer->record_result = ret;
317						goto fail_pipeline;
318					}
319				}
320
321				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
322							&(VkRenderPassBeginInfo) {
323								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
324									.renderPass = device->meta_state.blit2d_depth_only_rp[ds_layout],
325									.framebuffer = dst_temps.fb,
326									.renderArea = {
327									.offset = { rects[r].dst_x, rects[r].dst_y, },
328									.extent = { rects[r].width, rects[r].height },
329								},
330									.clearValueCount = 0,
331										.pClearValues = NULL,
332										}, VK_SUBPASS_CONTENTS_INLINE);
333
334
335				bind_depth_pipeline(cmd_buffer, src_type, log2_samples);
336
337			} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
338				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
339
340				if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) {
341					VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
342					if (ret != VK_SUCCESS) {
343						cmd_buffer->record_result = ret;
344						goto fail_pipeline;
345					}
346				}
347
348				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
349							&(VkRenderPassBeginInfo) {
350								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
351									.renderPass = device->meta_state.blit2d_stencil_only_rp[ds_layout],
352									.framebuffer = dst_temps.fb,
353									.renderArea = {
354									.offset = { rects[r].dst_x, rects[r].dst_y, },
355									.extent = { rects[r].width, rects[r].height },
356								},
357									.clearValueCount = 0,
358										.pClearValues = NULL,
359										}, VK_SUBPASS_CONTENTS_INLINE);
360
361
362				bind_stencil_pipeline(cmd_buffer, src_type, log2_samples);
363			} else
364				unreachable("Processing blit2d with multiple aspects.");
365
366			radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
367				.x = rects[r].dst_x,
368				.y = rects[r].dst_y,
369				.width = rects[r].width,
370				.height = rects[r].height,
371				.minDepth = 0.0f,
372				.maxDepth = 1.0f
373			});
374
375			radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
376				.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
377				.extent = (VkExtent2D) { rects[r].width, rects[r].height },
378			});
379
380
381
382			radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
383			radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
384
385fail_pipeline:
386			/* At the point where we emit the draw call, all data from the
387			* descriptor sets, etc. has been used.  We are free to delete it.
388			*/
389			radv_DestroyFramebuffer(radv_device_to_handle(device),
390						dst_temps.fb,
391						&cmd_buffer->pool->alloc);
392		}
393	}
394}
395
396void
397radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer,
398		 struct radv_meta_blit2d_surf *src_img,
399		 struct radv_meta_blit2d_buffer *src_buf,
400		 struct radv_meta_blit2d_surf *dst,
401		 unsigned num_rects,
402		 struct radv_meta_blit2d_rect *rects)
403{
404	bool use_3d = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
405		(src_img && src_img->image->type == VK_IMAGE_TYPE_3D);
406	enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER :
407		use_3d ? BLIT2D_SRC_TYPE_IMAGE_3D : BLIT2D_SRC_TYPE_IMAGE;
408	radv_meta_blit2d_normal_dst(cmd_buffer, src_img, src_buf, dst,
409				    num_rects, rects, src_type,
410				    src_img ? util_logbase2(src_img->image->info.samples) : 0);
411}
412
413static nir_shader *
414build_nir_vertex_shader(void)
415{
416	const struct glsl_type *vec4 = glsl_vec4_type();
417	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
418	nir_builder b;
419
420	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
421	b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_vs");
422
423	nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
424						    vec4, "gl_Position");
425	pos_out->data.location = VARYING_SLOT_POS;
426
427	nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
428							vec2, "v_tex_pos");
429	tex_pos_out->data.location = VARYING_SLOT_VAR0;
430	tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
431
432	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
433	nir_store_var(&b, pos_out, outvec, 0xf);
434
435	nir_intrinsic_instr *src_box = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
436	src_box->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
437	nir_intrinsic_set_base(src_box, 0);
438	nir_intrinsic_set_range(src_box, 16);
439	src_box->num_components = 4;
440	nir_ssa_dest_init(&src_box->instr, &src_box->dest, 4, 32, "src_box");
441	nir_builder_instr_insert(&b, &src_box->instr);
442
443	nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_vertex_id_zero_base);
444	nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
445	nir_builder_instr_insert(&b, &vertex_id->instr);
446
447	/* vertex 0 - src_x, src_y */
448	/* vertex 1 - src_x, src_y+h */
449	/* vertex 2 - src_x+w, src_y */
450	/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
451	   channel 1 is vertex id != 1 ? src_y : src_y + w */
452
453	nir_ssa_def *c0cmp = nir_ine(&b, &vertex_id->dest.ssa,
454				     nir_imm_int(&b, 2));
455	nir_ssa_def *c1cmp = nir_ine(&b, &vertex_id->dest.ssa,
456				     nir_imm_int(&b, 1));
457
458	nir_ssa_def *comp[2];
459	comp[0] = nir_bcsel(&b, c0cmp,
460			    nir_channel(&b, &src_box->dest.ssa, 0),
461			    nir_channel(&b, &src_box->dest.ssa, 2));
462
463	comp[1] = nir_bcsel(&b, c1cmp,
464			    nir_channel(&b, &src_box->dest.ssa, 1),
465			    nir_channel(&b, &src_box->dest.ssa, 3));
466	nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
467	nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
468	return b.shader;
469}
470
471typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
472                                               struct radv_device *,
473                                               nir_ssa_def *, bool, bool);
474
475static nir_ssa_def *
476build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device,
477                      nir_ssa_def *tex_pos, bool is_3d, bool is_multisampled)
478{
479	enum glsl_sampler_dim dim =
480		is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
481	const struct glsl_type *sampler_type =
482		glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
483	nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
484						    sampler_type, "s_tex");
485	sampler->data.descriptor_set = 0;
486	sampler->data.binding = 0;
487
488	nir_ssa_def *tex_pos_3d = NULL;
489	nir_intrinsic_instr *sample_idx = NULL;
490	if (is_3d) {
491		nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
492		nir_intrinsic_set_base(layer, 16);
493		nir_intrinsic_set_range(layer, 4);
494		layer->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
495		layer->num_components = 1;
496		nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
497		nir_builder_instr_insert(b, &layer->instr);
498
499		nir_ssa_def *chans[3];
500		chans[0] = nir_channel(b, tex_pos, 0);
501		chans[1] = nir_channel(b, tex_pos, 1);
502		chans[2] = &layer->dest.ssa;
503		tex_pos_3d = nir_vec(b, chans, 3);
504	}
505	if (is_multisampled) {
506		sample_idx = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_sample_id);
507		sample_idx->num_components = 1;
508		nir_ssa_dest_init(&sample_idx->instr, &sample_idx->dest, 1, 32, "sample_idx");
509		nir_builder_instr_insert(b, &sample_idx->instr);
510	}
511
512	nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
513
514	nir_tex_instr *tex = nir_tex_instr_create(b->shader, is_multisampled ? 4 : 3);
515	tex->sampler_dim = dim;
516	tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
517	tex->src[0].src_type = nir_tex_src_coord;
518	tex->src[0].src = nir_src_for_ssa(is_3d ? tex_pos_3d : tex_pos);
519	tex->src[1].src_type = is_multisampled ? nir_tex_src_ms_index : nir_tex_src_lod;
520	tex->src[1].src = nir_src_for_ssa(is_multisampled ? &sample_idx->dest.ssa : nir_imm_int(b, 0));
521	tex->src[2].src_type = nir_tex_src_texture_deref;
522	tex->src[2].src = nir_src_for_ssa(tex_deref);
523	if (is_multisampled) {
524		tex->src[3].src_type = nir_tex_src_lod;
525		tex->src[3].src = nir_src_for_ssa(nir_imm_int(b, 0));
526	}
527	tex->dest_type = nir_type_uint;
528	tex->is_array = false;
529	tex->coord_components = is_3d ? 3 : 2;
530
531	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
532	nir_builder_instr_insert(b, &tex->instr);
533
534	return &tex->dest.ssa;
535}
536
537
538static nir_ssa_def *
539build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
540		       nir_ssa_def *tex_pos, bool is_3d, bool is_multisampled)
541{
542	const struct glsl_type *sampler_type =
543		glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
544	nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
545						    sampler_type, "s_tex");
546	sampler->data.descriptor_set = 0;
547	sampler->data.binding = 0;
548
549	nir_intrinsic_instr *width = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
550	nir_intrinsic_set_base(width, 16);
551	nir_intrinsic_set_range(width, 4);
552	width->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
553	width->num_components = 1;
554	nir_ssa_dest_init(&width->instr, &width->dest, 1, 32, "width");
555	nir_builder_instr_insert(b, &width->instr);
556
557	nir_ssa_def *pos_x = nir_channel(b, tex_pos, 0);
558	nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
559	pos_y = nir_imul(b, pos_y, &width->dest.ssa);
560	pos_x = nir_iadd(b, pos_x, pos_y);
561
562	nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
563
564	nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
565	tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
566	tex->op = nir_texop_txf;
567	tex->src[0].src_type = nir_tex_src_coord;
568	tex->src[0].src = nir_src_for_ssa(pos_x);
569	tex->src[1].src_type = nir_tex_src_texture_deref;
570	tex->src[1].src = nir_src_for_ssa(tex_deref);
571	tex->dest_type = nir_type_uint;
572	tex->is_array = false;
573	tex->coord_components = 1;
574
575	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
576	nir_builder_instr_insert(b, &tex->instr);
577
578	return &tex->dest.ssa;
579}
580
581static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
582	.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
583	.vertexBindingDescriptionCount = 0,
584	.vertexAttributeDescriptionCount = 0,
585};
586
587static nir_shader *
588build_nir_copy_fragment_shader(struct radv_device *device,
589                               texel_fetch_build_func txf_func, const char* name, bool is_3d,
590                               bool is_multisampled)
591{
592	const struct glsl_type *vec4 = glsl_vec4_type();
593	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
594	nir_builder b;
595
596	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
597	b.shader->info.name = ralloc_strdup(b.shader, name);
598
599	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
600						       vec2, "v_tex_pos");
601	tex_pos_in->data.location = VARYING_SLOT_VAR0;
602
603	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
604						      vec4, "f_color");
605	color_out->data.location = FRAG_RESULT_DATA0;
606
607	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
608	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
609
610	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
611	nir_store_var(&b, color_out, color, 0xf);
612
613	return b.shader;
614}
615
616static nir_shader *
617build_nir_copy_fragment_shader_depth(struct radv_device *device,
618				     texel_fetch_build_func txf_func, const char* name, bool is_3d,
619				     bool is_multisampled)
620{
621	const struct glsl_type *vec4 = glsl_vec4_type();
622	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
623	nir_builder b;
624
625	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
626	b.shader->info.name = ralloc_strdup(b.shader, name);
627
628	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
629						       vec2, "v_tex_pos");
630	tex_pos_in->data.location = VARYING_SLOT_VAR0;
631
632	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
633						      vec4, "f_color");
634	color_out->data.location = FRAG_RESULT_DEPTH;
635
636	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
637	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
638
639	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
640	nir_store_var(&b, color_out, color, 0x1);
641
642	return b.shader;
643}
644
645static nir_shader *
646build_nir_copy_fragment_shader_stencil(struct radv_device *device,
647				       texel_fetch_build_func txf_func, const char* name, bool is_3d,
648				       bool is_multisampled)
649{
650	const struct glsl_type *vec4 = glsl_vec4_type();
651	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
652	nir_builder b;
653
654	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
655	b.shader->info.name = ralloc_strdup(b.shader, name);
656
657	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
658						       vec2, "v_tex_pos");
659	tex_pos_in->data.location = VARYING_SLOT_VAR0;
660
661	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
662						      vec4, "f_color");
663	color_out->data.location = FRAG_RESULT_STENCIL;
664
665	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
666	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
667
668	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
669	nir_store_var(&b, color_out, color, 0x1);
670
671	return b.shader;
672}
673
674void
675radv_device_finish_meta_blit2d_state(struct radv_device *device)
676{
677	struct radv_meta_state *state = &device->meta_state;
678
679	for(unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
680		for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
681			radv_DestroyRenderPass(radv_device_to_handle(device),
682					       state->blit2d_render_passes[j][k],
683					       &state->alloc);
684		}
685	}
686
687	for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT; j++) {
688		radv_DestroyRenderPass(radv_device_to_handle(device),
689				       state->blit2d_depth_only_rp[j], &state->alloc);
690		radv_DestroyRenderPass(radv_device_to_handle(device),
691				       state->blit2d_stencil_only_rp[j], &state->alloc);
692	}
693
694	for (unsigned log2_samples = 0; log2_samples < 1 + MAX_SAMPLES_LOG2; ++log2_samples) {
695		for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
696			radv_DestroyPipelineLayout(radv_device_to_handle(device),
697						   state->blit2d[log2_samples].p_layouts[src],
698						   &state->alloc);
699			radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
700							state->blit2d[log2_samples].ds_layouts[src],
701							&state->alloc);
702
703			for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
704				radv_DestroyPipeline(radv_device_to_handle(device),
705						     state->blit2d[log2_samples].pipelines[src][j],
706						     &state->alloc);
707			}
708
709			radv_DestroyPipeline(radv_device_to_handle(device),
710					     state->blit2d[log2_samples].depth_only_pipeline[src],
711					     &state->alloc);
712			radv_DestroyPipeline(radv_device_to_handle(device),
713					     state->blit2d[log2_samples].stencil_only_pipeline[src],
714					     &state->alloc);
715		}
716	}
717}
718
719static VkResult
720blit2d_init_color_pipeline(struct radv_device *device,
721			   enum blit2d_src_type src_type,
722			   VkFormat format,
723			   uint32_t log2_samples)
724{
725	VkResult result;
726	unsigned fs_key = radv_format_meta_fs_key(format);
727	const char *name;
728
729	mtx_lock(&device->meta_state.mtx);
730	if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
731		mtx_unlock(&device->meta_state.mtx);
732		return VK_SUCCESS;
733	}
734
735	texel_fetch_build_func src_func;
736	switch(src_type) {
737	case BLIT2D_SRC_TYPE_IMAGE:
738		src_func = build_nir_texel_fetch;
739		name = "meta_blit2d_image_fs";
740		break;
741	case BLIT2D_SRC_TYPE_IMAGE_3D:
742		src_func = build_nir_texel_fetch;
743		name = "meta_blit3d_image_fs";
744		break;
745	case BLIT2D_SRC_TYPE_BUFFER:
746		src_func = build_nir_buffer_fetch;
747		name = "meta_blit2d_buffer_fs";
748		break;
749	default:
750		unreachable("unknown blit src type\n");
751		break;
752	}
753
754	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
755	struct radv_shader_module fs = { .nir = NULL };
756
757
758	fs.nir = build_nir_copy_fragment_shader(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
759	vi_create_info = &normal_vi_create_info;
760
761	struct radv_shader_module vs = {
762		.nir = build_nir_vertex_shader(),
763	};
764
765	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
766		{
767			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
768			.stage = VK_SHADER_STAGE_VERTEX_BIT,
769			.module = radv_shader_module_to_handle(&vs),
770			.pName = "main",
771			.pSpecializationInfo = NULL
772		}, {
773			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
774			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
775			.module = radv_shader_module_to_handle(&fs),
776			.pName = "main",
777			.pSpecializationInfo = NULL
778		},
779	};
780
781	for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
782		if (!device->meta_state.blit2d_render_passes[fs_key][dst_layout]) {
783			VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
784
785			result = radv_CreateRenderPass(radv_device_to_handle(device),
786						&(VkRenderPassCreateInfo) {
787							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
788							.attachmentCount = 1,
789							.pAttachments = &(VkAttachmentDescription) {
790							.format = format,
791							.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
792							.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
793							.initialLayout = layout,
794							.finalLayout = layout,
795							},
796						.subpassCount = 1,
797						.pSubpasses = &(VkSubpassDescription) {
798							.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
799							.inputAttachmentCount = 0,
800							.colorAttachmentCount = 1,
801							.pColorAttachments = &(VkAttachmentReference) {
802								.attachment = 0,
803								.layout = layout,
804								},
805						.pResolveAttachments = NULL,
806						.pDepthStencilAttachment = &(VkAttachmentReference) {
807							.attachment = VK_ATTACHMENT_UNUSED,
808							.layout = layout,
809						},
810						.preserveAttachmentCount = 1,
811						.pPreserveAttachments = (uint32_t[]) { 0 },
812						},
813						.dependencyCount = 0,
814					}, &device->meta_state.alloc, &device->meta_state.blit2d_render_passes[fs_key][dst_layout]);
815		}
816	}
817
818	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
819		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
820		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
821		.pStages = pipeline_shader_stages,
822		.pVertexInputState = vi_create_info,
823		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
824			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
825			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
826			.primitiveRestartEnable = false,
827		},
828		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
829			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
830			.viewportCount = 1,
831			.scissorCount = 1,
832		},
833		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
834			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
835			.rasterizerDiscardEnable = false,
836			.polygonMode = VK_POLYGON_MODE_FILL,
837			.cullMode = VK_CULL_MODE_NONE,
838			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
839		},
840		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
841			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
842			.rasterizationSamples = 1 << log2_samples,
843			.sampleShadingEnable = log2_samples > 1,
844			.minSampleShading = 1.0,
845			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
846		},
847		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
848			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
849			.attachmentCount = 1,
850			.pAttachments = (VkPipelineColorBlendAttachmentState []) {
851				{ .colorWriteMask =
852				  VK_COLOR_COMPONENT_A_BIT |
853				  VK_COLOR_COMPONENT_R_BIT |
854				  VK_COLOR_COMPONENT_G_BIT |
855				  VK_COLOR_COMPONENT_B_BIT },
856			}
857		},
858		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
859			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
860			.dynamicStateCount = 9,
861			.pDynamicStates = (VkDynamicState[]) {
862				VK_DYNAMIC_STATE_VIEWPORT,
863				VK_DYNAMIC_STATE_SCISSOR,
864				VK_DYNAMIC_STATE_LINE_WIDTH,
865				VK_DYNAMIC_STATE_DEPTH_BIAS,
866				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
867				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
868				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
869				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
870				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
871			},
872		},
873		.flags = 0,
874		.layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
875		.renderPass = device->meta_state.blit2d_render_passes[fs_key][0],
876		.subpass = 0,
877	};
878
879	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
880		.use_rectlist = true
881	};
882
883	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
884					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
885					       &vk_pipeline_info, &radv_pipeline_info,
886					       &device->meta_state.alloc,
887					       &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]);
888
889
890	ralloc_free(vs.nir);
891	ralloc_free(fs.nir);
892
893	mtx_unlock(&device->meta_state.mtx);
894	return result;
895}
896
897static VkResult
898blit2d_init_depth_only_pipeline(struct radv_device *device,
899				enum blit2d_src_type src_type,
900				uint32_t log2_samples)
901{
902	VkResult result;
903	const char *name;
904
905	mtx_lock(&device->meta_state.mtx);
906	if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
907		mtx_unlock(&device->meta_state.mtx);
908		return VK_SUCCESS;
909	}
910
911	texel_fetch_build_func src_func;
912	switch(src_type) {
913	case BLIT2D_SRC_TYPE_IMAGE:
914		src_func = build_nir_texel_fetch;
915		name = "meta_blit2d_depth_image_fs";
916		break;
917	case BLIT2D_SRC_TYPE_IMAGE_3D:
918		src_func = build_nir_texel_fetch;
919		name = "meta_blit3d_depth_image_fs";
920		break;
921	case BLIT2D_SRC_TYPE_BUFFER:
922		src_func = build_nir_buffer_fetch;
923		name = "meta_blit2d_depth_buffer_fs";
924		break;
925	default:
926		unreachable("unknown blit src type\n");
927		break;
928	}
929
930	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
931	struct radv_shader_module fs = { .nir = NULL };
932
933	fs.nir = build_nir_copy_fragment_shader_depth(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
934	vi_create_info = &normal_vi_create_info;
935
936	struct radv_shader_module vs = {
937		.nir = build_nir_vertex_shader(),
938	};
939
940	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
941		{
942			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
943			.stage = VK_SHADER_STAGE_VERTEX_BIT,
944			.module = radv_shader_module_to_handle(&vs),
945			.pName = "main",
946			.pSpecializationInfo = NULL
947		}, {
948			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
949			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
950			.module = radv_shader_module_to_handle(&fs),
951			.pName = "main",
952			.pSpecializationInfo = NULL
953		},
954	};
955
956	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
957		if (!device->meta_state.blit2d_depth_only_rp[ds_layout]) {
958			VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
959			result = radv_CreateRenderPass(radv_device_to_handle(device),
960						       &(VkRenderPassCreateInfo) {
961							       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
962							       .attachmentCount = 1,
963							       .pAttachments = &(VkAttachmentDescription) {
964								       .format = VK_FORMAT_D32_SFLOAT,
965								       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
966								       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
967								       .initialLayout = layout,
968								       .finalLayout = layout,
969							       },
970							       .subpassCount = 1,
971							       .pSubpasses = &(VkSubpassDescription) {
972								       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
973								       .inputAttachmentCount = 0,
974								       .colorAttachmentCount = 0,
975								       .pColorAttachments = NULL,
976								       .pResolveAttachments = NULL,
977								       .pDepthStencilAttachment = &(VkAttachmentReference) {
978									       .attachment = 0,
979									       .layout = layout,
980								       },
981								       .preserveAttachmentCount = 1,
982								       .pPreserveAttachments = (uint32_t[]) { 0 },
983							       },
984							       .dependencyCount = 0,
985							}, &device->meta_state.alloc, &device->meta_state.blit2d_depth_only_rp[ds_layout]);
986		}
987	}
988
989	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
990		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
991		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
992		.pStages = pipeline_shader_stages,
993		.pVertexInputState = vi_create_info,
994		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
995			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
996			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
997			.primitiveRestartEnable = false,
998		},
999		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
1000			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
1001			.viewportCount = 1,
1002			.scissorCount = 1,
1003		},
1004		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
1005			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
1006			.rasterizerDiscardEnable = false,
1007			.polygonMode = VK_POLYGON_MODE_FILL,
1008			.cullMode = VK_CULL_MODE_NONE,
1009			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
1010		},
1011		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
1012			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
1013			.rasterizationSamples = 1 << log2_samples,
1014			.sampleShadingEnable = false,
1015			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
1016		},
1017		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
1018			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
1019			.attachmentCount = 0,
1020			.pAttachments = NULL,
1021		},
1022		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
1023			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
1024			.depthTestEnable = true,
1025			.depthWriteEnable = true,
1026			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
1027		},
1028		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
1029			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
1030			.dynamicStateCount = 9,
1031			.pDynamicStates = (VkDynamicState[]) {
1032				VK_DYNAMIC_STATE_VIEWPORT,
1033				VK_DYNAMIC_STATE_SCISSOR,
1034				VK_DYNAMIC_STATE_LINE_WIDTH,
1035				VK_DYNAMIC_STATE_DEPTH_BIAS,
1036				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
1037				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
1038				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
1039				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
1040				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
1041			},
1042		},
1043		.flags = 0,
1044		.layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
1045		.renderPass = device->meta_state.blit2d_depth_only_rp[0],
1046		.subpass = 0,
1047	};
1048
1049	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
1050		.use_rectlist = true
1051	};
1052
1053	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
1054					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
1055					       &vk_pipeline_info, &radv_pipeline_info,
1056					       &device->meta_state.alloc,
1057					       &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]);
1058
1059
1060	ralloc_free(vs.nir);
1061	ralloc_free(fs.nir);
1062
1063	mtx_unlock(&device->meta_state.mtx);
1064	return result;
1065}
1066
1067static VkResult
1068blit2d_init_stencil_only_pipeline(struct radv_device *device,
1069				  enum blit2d_src_type src_type,
1070				  uint32_t log2_samples)
1071{
1072	VkResult result;
1073	const char *name;
1074
1075	mtx_lock(&device->meta_state.mtx);
1076	if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
1077		mtx_unlock(&device->meta_state.mtx);
1078		return VK_SUCCESS;
1079	}
1080
1081	texel_fetch_build_func src_func;
1082	switch(src_type) {
1083	case BLIT2D_SRC_TYPE_IMAGE:
1084		src_func = build_nir_texel_fetch;
1085		name = "meta_blit2d_stencil_image_fs";
1086		break;
1087	case BLIT2D_SRC_TYPE_IMAGE_3D:
1088		src_func = build_nir_texel_fetch;
1089		name = "meta_blit3d_stencil_image_fs";
1090		break;
1091	case BLIT2D_SRC_TYPE_BUFFER:
1092		src_func = build_nir_buffer_fetch;
1093		name = "meta_blit2d_stencil_buffer_fs";
1094		break;
1095	default:
1096		unreachable("unknown blit src type\n");
1097		break;
1098	}
1099
1100	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
1101	struct radv_shader_module fs = { .nir = NULL };
1102
1103	fs.nir = build_nir_copy_fragment_shader_stencil(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
1104	vi_create_info = &normal_vi_create_info;
1105
1106	struct radv_shader_module vs = {
1107		.nir = build_nir_vertex_shader(),
1108	};
1109
1110	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
1111		{
1112			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1113			.stage = VK_SHADER_STAGE_VERTEX_BIT,
1114			.module = radv_shader_module_to_handle(&vs),
1115			.pName = "main",
1116			.pSpecializationInfo = NULL
1117		}, {
1118			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1119			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
1120			.module = radv_shader_module_to_handle(&fs),
1121			.pName = "main",
1122			.pSpecializationInfo = NULL
1123		},
1124	};
1125
1126	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
1127		if (!device->meta_state.blit2d_stencil_only_rp[ds_layout]) {
1128			VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
1129			result = radv_CreateRenderPass(radv_device_to_handle(device),
1130						       &(VkRenderPassCreateInfo) {
1131							       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1132							       .attachmentCount = 1,
1133							       .pAttachments = &(VkAttachmentDescription) {
1134								       .format = VK_FORMAT_S8_UINT,
1135								       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1136								       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1137								       .initialLayout = layout,
1138								       .finalLayout = layout,
1139							       },
1140							       .subpassCount = 1,
1141							       .pSubpasses = &(VkSubpassDescription) {
1142								       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1143								       .inputAttachmentCount = 0,
1144								       .colorAttachmentCount = 0,
1145								       .pColorAttachments = NULL,
1146								       .pResolveAttachments = NULL,
1147								       .pDepthStencilAttachment = &(VkAttachmentReference) {
1148									       .attachment = 0,
1149									       .layout = layout,
1150								       },
1151								       .preserveAttachmentCount = 1,
1152								       .pPreserveAttachments = (uint32_t[]) { 0 },
1153							       },
1154							       .dependencyCount = 0,
1155						       }, &device->meta_state.alloc, &device->meta_state.blit2d_stencil_only_rp[ds_layout]);
1156		}
1157	}
1158
1159	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
1160		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
1161		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
1162		.pStages = pipeline_shader_stages,
1163		.pVertexInputState = vi_create_info,
1164		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
1165			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
1166			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
1167			.primitiveRestartEnable = false,
1168		},
1169		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
1170			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
1171			.viewportCount = 1,
1172			.scissorCount = 1,
1173		},
1174		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
1175			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
1176			.rasterizerDiscardEnable = false,
1177			.polygonMode = VK_POLYGON_MODE_FILL,
1178			.cullMode = VK_CULL_MODE_NONE,
1179			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
1180		},
1181		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
1182			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
1183			.rasterizationSamples = 1 << log2_samples,
1184			.sampleShadingEnable = false,
1185			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
1186		},
1187		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
1188			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
1189			.attachmentCount = 0,
1190			.pAttachments = NULL,
1191		},
1192		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
1193			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
1194			.depthTestEnable = false,
1195			.depthWriteEnable = false,
1196			.stencilTestEnable = true,
1197			.front = {
1198				.failOp = VK_STENCIL_OP_REPLACE,
1199				.passOp = VK_STENCIL_OP_REPLACE,
1200				.depthFailOp = VK_STENCIL_OP_REPLACE,
1201				.compareOp = VK_COMPARE_OP_ALWAYS,
1202				.compareMask = 0xff,
1203				.writeMask = 0xff,
1204				.reference = 0
1205			},
1206			.back = {
1207				.failOp = VK_STENCIL_OP_REPLACE,
1208				.passOp = VK_STENCIL_OP_REPLACE,
1209				.depthFailOp = VK_STENCIL_OP_REPLACE,
1210				.compareOp = VK_COMPARE_OP_ALWAYS,
1211				.compareMask = 0xff,
1212				.writeMask = 0xff,
1213				.reference = 0
1214			},
1215			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
1216		},
1217		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
1218			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
1219			.dynamicStateCount = 6,
1220			.pDynamicStates = (VkDynamicState[]) {
1221				VK_DYNAMIC_STATE_VIEWPORT,
1222				VK_DYNAMIC_STATE_SCISSOR,
1223				VK_DYNAMIC_STATE_LINE_WIDTH,
1224				VK_DYNAMIC_STATE_DEPTH_BIAS,
1225				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
1226				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
1227			},
1228		},
1229		.flags = 0,
1230		.layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
1231		.renderPass = device->meta_state.blit2d_stencil_only_rp[0],
1232		.subpass = 0,
1233	};
1234
1235	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
1236		.use_rectlist = true
1237	};
1238
1239	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
1240					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
1241					       &vk_pipeline_info, &radv_pipeline_info,
1242					       &device->meta_state.alloc,
1243					       &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]);
1244
1245
1246	ralloc_free(vs.nir);
1247	ralloc_free(fs.nir);
1248
1249	mtx_unlock(&device->meta_state.mtx);
1250	return result;
1251}
1252
1253static VkResult
1254meta_blit2d_create_pipe_layout(struct radv_device *device,
1255			       int idx,
1256			       uint32_t log2_samples)
1257{
1258	VkResult result;
1259	VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER) ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
1260	const VkPushConstantRange push_constant_ranges[] = {
1261		{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
1262		{VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
1263	};
1264	int num_push_constant_range = (idx != BLIT2D_SRC_TYPE_IMAGE || log2_samples > 0) ? 2 : 1;
1265
1266	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1267						&(VkDescriptorSetLayoutCreateInfo) {
1268							.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1269							.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1270							.bindingCount = 1,
1271							.pBindings = (VkDescriptorSetLayoutBinding[]) {
1272							{
1273								.binding = 0,
1274								.descriptorType = desc_type,
1275								.descriptorCount = 1,
1276								.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
1277								.pImmutableSamplers = NULL
1278							},
1279							}
1280						}, &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].ds_layouts[idx]);
1281	if (result != VK_SUCCESS)
1282		goto fail;
1283
1284	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1285					   &(VkPipelineLayoutCreateInfo) {
1286						   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1287							   .setLayoutCount = 1,
1288							   .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx],
1289							   .pushConstantRangeCount = num_push_constant_range,
1290							   .pPushConstantRanges = push_constant_ranges,
1291							   },
1292					   &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]);
1293	if (result != VK_SUCCESS)
1294		goto fail;
1295	return VK_SUCCESS;
1296fail:
1297	return result;
1298}
1299
1300VkResult
1301radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand)
1302{
1303	VkResult result;
1304	bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
1305
1306	for (unsigned log2_samples = 0; log2_samples < 1 + MAX_SAMPLES_LOG2; log2_samples++) {
1307		for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
1308			if (src == BLIT2D_SRC_TYPE_IMAGE_3D && !create_3d)
1309				continue;
1310
1311			/* Don't need to handle copies between buffers and multisample images. */
1312			if (src == BLIT2D_SRC_TYPE_BUFFER && log2_samples > 0)
1313				continue;
1314
1315			result = meta_blit2d_create_pipe_layout(device, src, log2_samples);
1316			if (result != VK_SUCCESS)
1317				goto fail;
1318
1319			if (on_demand)
1320				continue;
1321
1322			for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
1323				result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples);
1324				if (result != VK_SUCCESS)
1325					goto fail;
1326			}
1327
1328			result = blit2d_init_depth_only_pipeline(device, src, log2_samples);
1329			if (result != VK_SUCCESS)
1330				goto fail;
1331
1332			result = blit2d_init_stencil_only_pipeline(device, src, log2_samples);
1333			if (result != VK_SUCCESS)
1334				goto fail;
1335		}
1336	}
1337
1338	return VK_SUCCESS;
1339
1340fail:
1341	radv_device_finish_meta_blit2d_state(device);
1342	return result;
1343}
1344