1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2014-2017 Broadcom
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "util/u_blitter.h"
25b8e80941Smrg#include "util/u_prim.h"
26b8e80941Smrg#include "util/u_format.h"
27b8e80941Smrg#include "util/u_pack_color.h"
28b8e80941Smrg#include "util/u_prim_restart.h"
29b8e80941Smrg#include "util/u_upload_mgr.h"
30b8e80941Smrg#include "indices/u_primconvert.h"
31b8e80941Smrg
32b8e80941Smrg#include "v3d_context.h"
33b8e80941Smrg#include "v3d_resource.h"
34b8e80941Smrg#include "v3d_cl.h"
35b8e80941Smrg#include "broadcom/compiler/v3d_compiler.h"
36b8e80941Smrg#include "broadcom/common/v3d_macros.h"
37b8e80941Smrg#include "broadcom/cle/v3dx_pack.h"
38b8e80941Smrg
39b8e80941Smrg/**
40b8e80941Smrg * Does the initial bining command list setup for drawing to a given FBO.
41b8e80941Smrg */
42b8e80941Smrgstatic void
43b8e80941Smrgv3d_start_draw(struct v3d_context *v3d)
44b8e80941Smrg{
45b8e80941Smrg        struct v3d_job *job = v3d->job;
46b8e80941Smrg
47b8e80941Smrg        if (job->needs_flush)
48b8e80941Smrg                return;
49b8e80941Smrg
50b8e80941Smrg        /* Get space to emit our BCL state, using a branch to jump to a new BO
51b8e80941Smrg         * if necessary.
52b8e80941Smrg         */
53b8e80941Smrg        v3d_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
54b8e80941Smrg
55b8e80941Smrg        job->submit.bcl_start = job->bcl.bo->offset;
56b8e80941Smrg        v3d_job_add_bo(job, job->bcl.bo);
57b8e80941Smrg
58b8e80941Smrg        /* The PTB will request the tile alloc initial size per tile at start
59b8e80941Smrg         * of tile binning.
60b8e80941Smrg         */
61b8e80941Smrg        uint32_t tile_alloc_size = (job->draw_tiles_x *
62b8e80941Smrg                                    job->draw_tiles_y) * 64;
63b8e80941Smrg        /* The PTB allocates in aligned 4k chunks after the initial setup. */
64b8e80941Smrg        tile_alloc_size = align(tile_alloc_size, 4096);
65b8e80941Smrg
66b8e80941Smrg        /* Include the first two chunk allocations that the PTB does so that
67b8e80941Smrg         * we definitely clear the OOM condition before triggering one (the HW
68b8e80941Smrg         * won't trigger OOM during the first allocations).
69b8e80941Smrg         */
70b8e80941Smrg        tile_alloc_size += 8192;
71b8e80941Smrg
72b8e80941Smrg        /* For performance, allocate some extra initial memory after the PTB's
73b8e80941Smrg         * minimal allocations, so that we hopefully don't have to block the
74b8e80941Smrg         * GPU on the kernel handling an OOM signal.
75b8e80941Smrg         */
76b8e80941Smrg        tile_alloc_size += 512 * 1024;
77b8e80941Smrg
78b8e80941Smrg        job->tile_alloc = v3d_bo_alloc(v3d->screen, tile_alloc_size,
79b8e80941Smrg                                       "tile_alloc");
80b8e80941Smrg        uint32_t tsda_per_tile_size = v3d->screen->devinfo.ver >= 40 ? 256 : 64;
81b8e80941Smrg        job->tile_state = v3d_bo_alloc(v3d->screen,
82b8e80941Smrg                                       job->draw_tiles_y *
83b8e80941Smrg                                       job->draw_tiles_x *
84b8e80941Smrg                                       tsda_per_tile_size,
85b8e80941Smrg                                       "TSDA");
86b8e80941Smrg
87b8e80941Smrg#if V3D_VERSION >= 40
88b8e80941Smrg        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
89b8e80941Smrg                config.width_in_pixels = v3d->framebuffer.width;
90b8e80941Smrg                config.height_in_pixels = v3d->framebuffer.height;
91b8e80941Smrg                config.number_of_render_targets =
92b8e80941Smrg                        MAX2(v3d->framebuffer.nr_cbufs, 1);
93b8e80941Smrg
94b8e80941Smrg                config.multisample_mode_4x = job->msaa;
95b8e80941Smrg
96b8e80941Smrg                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
97b8e80941Smrg        }
98b8e80941Smrg#else /* V3D_VERSION < 40 */
99b8e80941Smrg        /* "Binning mode lists start with a Tile Binning Mode Configuration
100b8e80941Smrg         * item (120)"
101b8e80941Smrg         *
102b8e80941Smrg         * Part1 signals the end of binning config setup.
103b8e80941Smrg         */
104b8e80941Smrg        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART2, config) {
105b8e80941Smrg                config.tile_allocation_memory_address =
106b8e80941Smrg                        cl_address(job->tile_alloc, 0);
107b8e80941Smrg                config.tile_allocation_memory_size = job->tile_alloc->size;
108b8e80941Smrg        }
109b8e80941Smrg
110b8e80941Smrg        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART1, config) {
111b8e80941Smrg                config.tile_state_data_array_base_address =
112b8e80941Smrg                        cl_address(job->tile_state, 0);
113b8e80941Smrg
114b8e80941Smrg                config.width_in_tiles = job->draw_tiles_x;
115b8e80941Smrg                config.height_in_tiles = job->draw_tiles_y;
116b8e80941Smrg                /* Must be >= 1 */
117b8e80941Smrg                config.number_of_render_targets =
118b8e80941Smrg                        MAX2(v3d->framebuffer.nr_cbufs, 1);
119b8e80941Smrg
120b8e80941Smrg                config.multisample_mode_4x = job->msaa;
121b8e80941Smrg
122b8e80941Smrg                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
123b8e80941Smrg        }
124b8e80941Smrg#endif /* V3D_VERSION < 40 */
125b8e80941Smrg
126b8e80941Smrg        /* There's definitely nothing in the VCD cache we want. */
127b8e80941Smrg        cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
128b8e80941Smrg
129b8e80941Smrg        /* Disable any leftover OQ state from another job. */
130b8e80941Smrg        cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter);
131b8e80941Smrg
132b8e80941Smrg        /* "Binning mode lists must have a Start Tile Binning item (6) after
133b8e80941Smrg         *  any prefix state data before the binning list proper starts."
134b8e80941Smrg         */
135b8e80941Smrg        cl_emit(&job->bcl, START_TILE_BINNING, bin);
136b8e80941Smrg
137b8e80941Smrg        job->needs_flush = true;
138b8e80941Smrg        job->draw_width = v3d->framebuffer.width;
139b8e80941Smrg        job->draw_height = v3d->framebuffer.height;
140b8e80941Smrg}
141b8e80941Smrg
142b8e80941Smrgstatic void
143b8e80941Smrgv3d_predraw_check_stage_inputs(struct pipe_context *pctx,
144b8e80941Smrg                               enum pipe_shader_type s)
145b8e80941Smrg{
146b8e80941Smrg        struct v3d_context *v3d = v3d_context(pctx);
147b8e80941Smrg
148b8e80941Smrg        /* XXX perf: If we're reading from the output of TF in this job, we
149b8e80941Smrg         * should instead be using the wait for transform feedback
150b8e80941Smrg         * functionality.
151b8e80941Smrg         */
152b8e80941Smrg
153b8e80941Smrg        /* Flush writes to textures we're sampling. */
154b8e80941Smrg        for (int i = 0; i < v3d->tex[s].num_textures; i++) {
155b8e80941Smrg                struct pipe_sampler_view *pview = v3d->tex[s].textures[i];
156b8e80941Smrg                if (!pview)
157b8e80941Smrg                        continue;
158b8e80941Smrg                struct v3d_sampler_view *view = v3d_sampler_view(pview);
159b8e80941Smrg
160b8e80941Smrg                if (view->texture != view->base.texture &&
161b8e80941Smrg                    view->base.format != PIPE_FORMAT_X32_S8X24_UINT)
162b8e80941Smrg                        v3d_update_shadow_texture(pctx, &view->base);
163b8e80941Smrg
164b8e80941Smrg                v3d_flush_jobs_writing_resource(v3d, view->texture);
165b8e80941Smrg        }
166b8e80941Smrg
167b8e80941Smrg        /* Flush writes to UBOs. */
168b8e80941Smrg        foreach_bit(i, v3d->constbuf[s].enabled_mask) {
169b8e80941Smrg                struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i];
170b8e80941Smrg                if (cb->buffer)
171b8e80941Smrg                        v3d_flush_jobs_writing_resource(v3d, cb->buffer);
172b8e80941Smrg        }
173b8e80941Smrg
174b8e80941Smrg        /* Flush writes to our image views */
175b8e80941Smrg        foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
176b8e80941Smrg                struct v3d_image_view *view = &v3d->shaderimg[s].si[i];
177b8e80941Smrg
178b8e80941Smrg                v3d_flush_jobs_writing_resource(v3d, view->base.resource);
179b8e80941Smrg        }
180b8e80941Smrg}
181b8e80941Smrg
182b8e80941Smrgstatic void
183b8e80941Smrgv3d_emit_gl_shader_state(struct v3d_context *v3d,
184b8e80941Smrg                         const struct pipe_draw_info *info)
185b8e80941Smrg{
186b8e80941Smrg        struct v3d_job *job = v3d->job;
187b8e80941Smrg        /* VC5_DIRTY_VTXSTATE */
188b8e80941Smrg        struct v3d_vertex_stateobj *vtx = v3d->vtx;
189b8e80941Smrg        /* VC5_DIRTY_VTXBUF */
190b8e80941Smrg        struct v3d_vertexbuf_stateobj *vertexbuf = &v3d->vertexbuf;
191b8e80941Smrg
192b8e80941Smrg        /* Upload the uniforms to the indirect CL first */
193b8e80941Smrg        struct v3d_cl_reloc fs_uniforms =
194b8e80941Smrg                v3d_write_uniforms(v3d, v3d->prog.fs,
195b8e80941Smrg                                   PIPE_SHADER_FRAGMENT);
196b8e80941Smrg        struct v3d_cl_reloc vs_uniforms =
197b8e80941Smrg                v3d_write_uniforms(v3d, v3d->prog.vs,
198b8e80941Smrg                                   PIPE_SHADER_VERTEX);
199b8e80941Smrg        struct v3d_cl_reloc cs_uniforms =
200b8e80941Smrg                v3d_write_uniforms(v3d, v3d->prog.cs,
201b8e80941Smrg                                   PIPE_SHADER_VERTEX);
202b8e80941Smrg
203b8e80941Smrg        /* See GFXH-930 workaround below */
204b8e80941Smrg        uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1);
205b8e80941Smrg        uint32_t shader_rec_offset =
206b8e80941Smrg                v3d_cl_ensure_space(&job->indirect,
207b8e80941Smrg                                    cl_packet_length(GL_SHADER_STATE_RECORD) +
208b8e80941Smrg                                    num_elements_to_emit *
209b8e80941Smrg                                    cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
210b8e80941Smrg                                    32);
211b8e80941Smrg
212b8e80941Smrg        /* XXX perf: We should move most of the SHADER_STATE_RECORD setup to
213b8e80941Smrg         * compile time, so that we mostly just have to OR the VS and FS
214b8e80941Smrg         * records together at draw time.
215b8e80941Smrg         */
216b8e80941Smrg        cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
217b8e80941Smrg                shader.enable_clipping = true;
218b8e80941Smrg                /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
219b8e80941Smrg                shader.point_size_in_shaded_vertex_data =
220b8e80941Smrg                        (info->mode == PIPE_PRIM_POINTS &&
221b8e80941Smrg                         v3d->rasterizer->base.point_size_per_vertex);
222b8e80941Smrg
223b8e80941Smrg                /* Must be set if the shader modifies Z, discards, or modifies
224b8e80941Smrg                 * the sample mask.  For any of these cases, the fragment
225b8e80941Smrg                 * shader needs to write the Z value (even just discards).
226b8e80941Smrg                 */
227b8e80941Smrg                shader.fragment_shader_does_z_writes =
228b8e80941Smrg                        v3d->prog.fs->prog_data.fs->writes_z;
229b8e80941Smrg                /* Set if the EZ test must be disabled (due to shader side
230b8e80941Smrg                 * effects and the early_z flag not being present in the
231b8e80941Smrg                 * shader).
232b8e80941Smrg                 */
233b8e80941Smrg                shader.turn_off_early_z_test =
234b8e80941Smrg                        v3d->prog.fs->prog_data.fs->disable_ez;
235b8e80941Smrg
236b8e80941Smrg                shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
237b8e80941Smrg                        v3d->prog.fs->prog_data.fs->uses_center_w;
238b8e80941Smrg
239b8e80941Smrg                shader.number_of_varyings_in_fragment_shader =
240b8e80941Smrg                        v3d->prog.fs->prog_data.fs->num_inputs;
241b8e80941Smrg
242b8e80941Smrg                shader.coordinate_shader_propagate_nans = true;
243b8e80941Smrg                shader.vertex_shader_propagate_nans = true;
244b8e80941Smrg                shader.fragment_shader_propagate_nans = true;
245b8e80941Smrg
246b8e80941Smrg                shader.coordinate_shader_code_address =
247b8e80941Smrg                        cl_address(v3d_resource(v3d->prog.cs->resource)->bo,
248b8e80941Smrg                                   v3d->prog.cs->offset);
249b8e80941Smrg                shader.vertex_shader_code_address =
250b8e80941Smrg                        cl_address(v3d_resource(v3d->prog.vs->resource)->bo,
251b8e80941Smrg                                   v3d->prog.vs->offset);
252b8e80941Smrg                shader.fragment_shader_code_address =
253b8e80941Smrg                        cl_address(v3d_resource(v3d->prog.fs->resource)->bo,
254b8e80941Smrg                                   v3d->prog.fs->offset);
255b8e80941Smrg
256b8e80941Smrg                /* XXX: Use combined input/output size flag in the common
257b8e80941Smrg                 * case.
258b8e80941Smrg                 */
259b8e80941Smrg                shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
260b8e80941Smrg                        v3d->prog.cs->prog_data.vs->separate_segments;
261b8e80941Smrg                shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
262b8e80941Smrg                        v3d->prog.vs->prog_data.vs->separate_segments;
263b8e80941Smrg
264b8e80941Smrg                shader.coordinate_shader_input_vpm_segment_size =
265b8e80941Smrg                        v3d->prog.cs->prog_data.vs->separate_segments ?
266b8e80941Smrg                        v3d->prog.cs->prog_data.vs->vpm_input_size : 1;
267b8e80941Smrg                shader.vertex_shader_input_vpm_segment_size =
268b8e80941Smrg                        v3d->prog.vs->prog_data.vs->separate_segments ?
269b8e80941Smrg                        v3d->prog.vs->prog_data.vs->vpm_input_size : 1;
270b8e80941Smrg
271b8e80941Smrg                shader.coordinate_shader_output_vpm_segment_size =
272b8e80941Smrg                        v3d->prog.cs->prog_data.vs->vpm_output_size;
273b8e80941Smrg                shader.vertex_shader_output_vpm_segment_size =
274b8e80941Smrg                        v3d->prog.vs->prog_data.vs->vpm_output_size;
275b8e80941Smrg
276b8e80941Smrg                shader.coordinate_shader_uniforms_address = cs_uniforms;
277b8e80941Smrg                shader.vertex_shader_uniforms_address = vs_uniforms;
278b8e80941Smrg                shader.fragment_shader_uniforms_address = fs_uniforms;
279b8e80941Smrg
280b8e80941Smrg#if V3D_VERSION >= 41
281b8e80941Smrg                shader.min_coord_shader_input_segments_required_in_play = 1;
282b8e80941Smrg                shader.min_vertex_shader_input_segments_required_in_play = 1;
283b8e80941Smrg
284b8e80941Smrg                shader.coordinate_shader_4_way_threadable =
285b8e80941Smrg                        v3d->prog.cs->prog_data.vs->base.threads == 4;
286b8e80941Smrg                shader.vertex_shader_4_way_threadable =
287b8e80941Smrg                        v3d->prog.vs->prog_data.vs->base.threads == 4;
288b8e80941Smrg                shader.fragment_shader_4_way_threadable =
289b8e80941Smrg                        v3d->prog.fs->prog_data.fs->base.threads == 4;
290b8e80941Smrg
291b8e80941Smrg                shader.coordinate_shader_start_in_final_thread_section =
292b8e80941Smrg                        v3d->prog.cs->prog_data.vs->base.single_seg;
293b8e80941Smrg                shader.vertex_shader_start_in_final_thread_section =
294b8e80941Smrg                        v3d->prog.vs->prog_data.vs->base.single_seg;
295b8e80941Smrg                shader.fragment_shader_start_in_final_thread_section =
296b8e80941Smrg                        v3d->prog.fs->prog_data.fs->base.single_seg;
297b8e80941Smrg#else
298b8e80941Smrg                shader.coordinate_shader_4_way_threadable =
299b8e80941Smrg                        v3d->prog.cs->prog_data.vs->base.threads == 4;
300b8e80941Smrg                shader.coordinate_shader_2_way_threadable =
301b8e80941Smrg                        v3d->prog.cs->prog_data.vs->base.threads == 2;
302b8e80941Smrg                shader.vertex_shader_4_way_threadable =
303b8e80941Smrg                        v3d->prog.vs->prog_data.vs->base.threads == 4;
304b8e80941Smrg                shader.vertex_shader_2_way_threadable =
305b8e80941Smrg                        v3d->prog.vs->prog_data.vs->base.threads == 2;
306b8e80941Smrg                shader.fragment_shader_4_way_threadable =
307b8e80941Smrg                        v3d->prog.fs->prog_data.fs->base.threads == 4;
308b8e80941Smrg                shader.fragment_shader_2_way_threadable =
309b8e80941Smrg                        v3d->prog.fs->prog_data.fs->base.threads == 2;
310b8e80941Smrg#endif
311b8e80941Smrg
312b8e80941Smrg                shader.vertex_id_read_by_coordinate_shader =
313b8e80941Smrg                        v3d->prog.cs->prog_data.vs->uses_vid;
314b8e80941Smrg                shader.instance_id_read_by_coordinate_shader =
315b8e80941Smrg                        v3d->prog.cs->prog_data.vs->uses_iid;
316b8e80941Smrg                shader.vertex_id_read_by_vertex_shader =
317b8e80941Smrg                        v3d->prog.vs->prog_data.vs->uses_vid;
318b8e80941Smrg                shader.instance_id_read_by_vertex_shader =
319b8e80941Smrg                        v3d->prog.vs->prog_data.vs->uses_iid;
320b8e80941Smrg
321b8e80941Smrg                shader.address_of_default_attribute_values =
322b8e80941Smrg                        cl_address(v3d_resource(vtx->defaults)->bo,
323b8e80941Smrg                                   vtx->defaults_offset);
324b8e80941Smrg        }
325b8e80941Smrg
326b8e80941Smrg        bool cs_loaded_any = false;
327b8e80941Smrg        for (int i = 0; i < vtx->num_elements; i++) {
328b8e80941Smrg                struct pipe_vertex_element *elem = &vtx->pipe[i];
329b8e80941Smrg                struct pipe_vertex_buffer *vb =
330b8e80941Smrg                        &vertexbuf->vb[elem->vertex_buffer_index];
331b8e80941Smrg                struct v3d_resource *rsc = v3d_resource(vb->buffer.resource);
332b8e80941Smrg
333b8e80941Smrg                const uint32_t size =
334b8e80941Smrg                        cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
335b8e80941Smrg                cl_emit_with_prepacked(&job->indirect,
336b8e80941Smrg                                       GL_SHADER_STATE_ATTRIBUTE_RECORD,
337b8e80941Smrg                                       &vtx->attrs[i * size], attr) {
338b8e80941Smrg                        attr.stride = vb->stride;
339b8e80941Smrg                        attr.address = cl_address(rsc->bo,
340b8e80941Smrg                                                  vb->buffer_offset +
341b8e80941Smrg                                                  elem->src_offset);
342b8e80941Smrg                        attr.number_of_values_read_by_coordinate_shader =
343b8e80941Smrg                                v3d->prog.cs->prog_data.vs->vattr_sizes[i];
344b8e80941Smrg                        attr.number_of_values_read_by_vertex_shader =
345b8e80941Smrg                                v3d->prog.vs->prog_data.vs->vattr_sizes[i];
346b8e80941Smrg
347b8e80941Smrg                        /* GFXH-930: At least one attribute must be enabled
348b8e80941Smrg                         * and read by CS and VS.  If we have attributes being
349b8e80941Smrg                         * consumed by the VS but not the CS, then set up a
350b8e80941Smrg                         * dummy load of the last attribute into the CS's VPM
351b8e80941Smrg                         * inputs.  (Since CS is just dead-code-elimination
352b8e80941Smrg                         * compared to VS, we can't have CS loading but not
353b8e80941Smrg                         * VS).
354b8e80941Smrg                         */
355b8e80941Smrg                        if (v3d->prog.cs->prog_data.vs->vattr_sizes[i])
356b8e80941Smrg                                cs_loaded_any = true;
357b8e80941Smrg                        if (i == vtx->num_elements - 1 && !cs_loaded_any) {
358b8e80941Smrg                                attr.number_of_values_read_by_coordinate_shader = 1;
359b8e80941Smrg                        }
360b8e80941Smrg#if V3D_VERSION >= 41
361b8e80941Smrg                        attr.maximum_index = 0xffffff;
362b8e80941Smrg#endif
363b8e80941Smrg                }
364b8e80941Smrg                STATIC_ASSERT(sizeof(vtx->attrs) >= V3D_MAX_VS_INPUTS / 4 * size);
365b8e80941Smrg        }
366b8e80941Smrg
367b8e80941Smrg        if (vtx->num_elements == 0) {
368b8e80941Smrg                /* GFXH-930: At least one attribute must be enabled and read
369b8e80941Smrg                 * by CS and VS.  If we have no attributes being consumed by
370b8e80941Smrg                 * the shader, set up a dummy to be loaded into the VPM.
371b8e80941Smrg                 */
372b8e80941Smrg                cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
373b8e80941Smrg                        /* Valid address of data whose value will be unused. */
374b8e80941Smrg                        attr.address = cl_address(job->indirect.bo, 0);
375b8e80941Smrg
376b8e80941Smrg                        attr.type = ATTRIBUTE_FLOAT;
377b8e80941Smrg                        attr.stride = 0;
378b8e80941Smrg                        attr.vec_size = 1;
379b8e80941Smrg
380b8e80941Smrg                        attr.number_of_values_read_by_coordinate_shader = 1;
381b8e80941Smrg                        attr.number_of_values_read_by_vertex_shader = 1;
382b8e80941Smrg                }
383b8e80941Smrg        }
384b8e80941Smrg
385b8e80941Smrg        cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) {
386b8e80941Smrg                vcm.number_of_16_vertex_batches_for_binning =
387b8e80941Smrg                        v3d->prog.cs->prog_data.vs->vcm_cache_size;
388b8e80941Smrg                vcm.number_of_16_vertex_batches_for_rendering =
389b8e80941Smrg                        v3d->prog.vs->prog_data.vs->vcm_cache_size;
390b8e80941Smrg        }
391b8e80941Smrg
392b8e80941Smrg        cl_emit(&job->bcl, GL_SHADER_STATE, state) {
393b8e80941Smrg                state.address = cl_address(job->indirect.bo, shader_rec_offset);
394b8e80941Smrg                state.number_of_attribute_arrays = num_elements_to_emit;
395b8e80941Smrg        }
396b8e80941Smrg
397b8e80941Smrg        v3d_bo_unreference(&cs_uniforms.bo);
398b8e80941Smrg        v3d_bo_unreference(&vs_uniforms.bo);
399b8e80941Smrg        v3d_bo_unreference(&fs_uniforms.bo);
400b8e80941Smrg
401b8e80941Smrg        job->shader_rec_count++;
402b8e80941Smrg}
403b8e80941Smrg
404b8e80941Smrg/**
405b8e80941Smrg * Computes the various transform feedback statistics, since they can't be
406b8e80941Smrg * recorded by CL packets.
407b8e80941Smrg */
408b8e80941Smrgstatic void
409b8e80941Smrgv3d_tf_statistics_record(struct v3d_context *v3d,
410b8e80941Smrg                         const struct pipe_draw_info *info,
411b8e80941Smrg                         bool prim_tf)
412b8e80941Smrg{
413b8e80941Smrg        if (!v3d->active_queries)
414b8e80941Smrg                return;
415b8e80941Smrg
416b8e80941Smrg        uint32_t prims = u_prims_for_vertices(info->mode, info->count);
417b8e80941Smrg        v3d->prims_generated += prims;
418b8e80941Smrg
419b8e80941Smrg        if (prim_tf) {
420b8e80941Smrg                /* XXX: Only count if we didn't overflow. */
421b8e80941Smrg                v3d->tf_prims_generated += prims;
422b8e80941Smrg        }
423b8e80941Smrg}
424b8e80941Smrg
425b8e80941Smrgstatic void
426b8e80941Smrgv3d_update_job_ez(struct v3d_context *v3d, struct v3d_job *job)
427b8e80941Smrg{
428b8e80941Smrg        switch (v3d->zsa->ez_state) {
429b8e80941Smrg        case VC5_EZ_UNDECIDED:
430b8e80941Smrg                /* If the Z/S state didn't pick a direction but didn't
431b8e80941Smrg                 * disable, then go along with the current EZ state.  This
432b8e80941Smrg                 * allows EZ optimization for Z func == EQUAL or NEVER.
433b8e80941Smrg                 */
434b8e80941Smrg                break;
435b8e80941Smrg
436b8e80941Smrg        case VC5_EZ_LT_LE:
437b8e80941Smrg        case VC5_EZ_GT_GE:
438b8e80941Smrg                /* If the Z/S state picked a direction, then it needs to match
439b8e80941Smrg                 * the current direction if we've decided on one.
440b8e80941Smrg                 */
441b8e80941Smrg                if (job->ez_state == VC5_EZ_UNDECIDED)
442b8e80941Smrg                        job->ez_state = v3d->zsa->ez_state;
443b8e80941Smrg                else if (job->ez_state != v3d->zsa->ez_state)
444b8e80941Smrg                        job->ez_state = VC5_EZ_DISABLED;
445b8e80941Smrg                break;
446b8e80941Smrg
447b8e80941Smrg        case VC5_EZ_DISABLED:
448b8e80941Smrg                /* If the current Z/S state disables EZ because of a bad Z
449b8e80941Smrg                 * func or stencil operation, then we can't do any more EZ in
450b8e80941Smrg                 * this frame.
451b8e80941Smrg                 */
452b8e80941Smrg                job->ez_state = VC5_EZ_DISABLED;
453b8e80941Smrg                break;
454b8e80941Smrg        }
455b8e80941Smrg
456b8e80941Smrg        /* If the FS affects the Z of the pixels, then it may update against
457b8e80941Smrg         * the chosen EZ direction (though we could use
458b8e80941Smrg         * ARB_conservative_depth's hints to avoid this)
459b8e80941Smrg         */
460b8e80941Smrg        if (v3d->prog.fs->prog_data.fs->writes_z) {
461b8e80941Smrg                job->ez_state = VC5_EZ_DISABLED;
462b8e80941Smrg        }
463b8e80941Smrg
464b8e80941Smrg        if (job->first_ez_state == VC5_EZ_UNDECIDED &&
465b8e80941Smrg            (job->ez_state != VC5_EZ_DISABLED || job->draw_calls_queued == 0))
466b8e80941Smrg                job->first_ez_state = job->ez_state;
467b8e80941Smrg}
468b8e80941Smrg
469b8e80941Smrgstatic void
470b8e80941Smrgv3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
471b8e80941Smrg{
472b8e80941Smrg        struct v3d_context *v3d = v3d_context(pctx);
473b8e80941Smrg
474b8e80941Smrg        if (!info->count_from_stream_output && !info->indirect &&
475b8e80941Smrg            !info->primitive_restart &&
476b8e80941Smrg            !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
477b8e80941Smrg                return;
478b8e80941Smrg
479b8e80941Smrg        /* Fall back for weird desktop GL primitive restart values. */
480b8e80941Smrg        if (info->primitive_restart &&
481b8e80941Smrg            info->index_size) {
482b8e80941Smrg                uint32_t mask = ~0;
483b8e80941Smrg
484b8e80941Smrg                switch (info->index_size) {
485b8e80941Smrg                case 2:
486b8e80941Smrg                        mask = 0xffff;
487b8e80941Smrg                        break;
488b8e80941Smrg                case 1:
489b8e80941Smrg                        mask = 0xff;
490b8e80941Smrg                        break;
491b8e80941Smrg                }
492b8e80941Smrg
493b8e80941Smrg                if (info->restart_index != mask) {
494b8e80941Smrg                        util_draw_vbo_without_prim_restart(pctx, info);
495b8e80941Smrg                        return;
496b8e80941Smrg                }
497b8e80941Smrg        }
498b8e80941Smrg
499b8e80941Smrg        if (info->mode >= PIPE_PRIM_QUADS) {
500b8e80941Smrg                util_primconvert_save_rasterizer_state(v3d->primconvert, &v3d->rasterizer->base);
501b8e80941Smrg                util_primconvert_draw_vbo(v3d->primconvert, info);
502b8e80941Smrg                perf_debug("Fallback conversion for %d %s vertices\n",
503b8e80941Smrg                           info->count, u_prim_name(info->mode));
504b8e80941Smrg                return;
505b8e80941Smrg        }
506b8e80941Smrg
507b8e80941Smrg        /* Before setting up the draw, flush anything writing to the textures
508b8e80941Smrg         * that we read from.
509b8e80941Smrg         */
510b8e80941Smrg        for (int s = 0; s < PIPE_SHADER_COMPUTE; s++)
511b8e80941Smrg                v3d_predraw_check_stage_inputs(pctx, s);
512b8e80941Smrg
513b8e80941Smrg        if (info->indirect)
514b8e80941Smrg                v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer);
515b8e80941Smrg
516b8e80941Smrg        struct v3d_job *job = v3d_get_job_for_fbo(v3d);
517b8e80941Smrg
518b8e80941Smrg        /* If vertex texturing depends on the output of rendering, we need to
519b8e80941Smrg         * ensure that that rendering is complete before we run a coordinate
520b8e80941Smrg         * shader that depends on it.
521b8e80941Smrg         *
522b8e80941Smrg         * Given that doing that is unusual, for now we just block the binner
523b8e80941Smrg         * on the last submitted render, rather than tracking the last
524b8e80941Smrg         * rendering to each texture's BO.
525b8e80941Smrg         */
526b8e80941Smrg        if (v3d->tex[PIPE_SHADER_VERTEX].num_textures || info->indirect) {
527b8e80941Smrg                perf_debug("Blocking binner on last render "
528b8e80941Smrg                           "due to vertex texturing or indirect drawing.\n");
529b8e80941Smrg                job->submit.in_sync_bcl = v3d->out_sync;
530b8e80941Smrg        }
531b8e80941Smrg
532b8e80941Smrg        /* Mark SSBOs as being written.  We don't actually know which ones are
533b8e80941Smrg         * read vs written, so just assume the worst
534b8e80941Smrg         */
535b8e80941Smrg        for (int s = 0; s < PIPE_SHADER_COMPUTE; s++) {
536b8e80941Smrg                foreach_bit(i, v3d->ssbo[s].enabled_mask) {
537b8e80941Smrg                        v3d_job_add_write_resource(job,
538b8e80941Smrg                                                   v3d->ssbo[s].sb[i].buffer);
539b8e80941Smrg                        job->tmu_dirty_rcl = true;
540b8e80941Smrg                }
541b8e80941Smrg
542b8e80941Smrg                foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
543b8e80941Smrg                        v3d_job_add_write_resource(job,
544b8e80941Smrg                                                   v3d->shaderimg[s].si[i].base.resource);
545b8e80941Smrg                        job->tmu_dirty_rcl = true;
546b8e80941Smrg                }
547b8e80941Smrg        }
548b8e80941Smrg
549b8e80941Smrg        /* Get space to emit our draw call into the BCL, using a branch to
550b8e80941Smrg         * jump to a new BO if necessary.
551b8e80941Smrg         */
552b8e80941Smrg        v3d_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
553b8e80941Smrg
554b8e80941Smrg        if (v3d->prim_mode != info->mode) {
555b8e80941Smrg                v3d->prim_mode = info->mode;
556b8e80941Smrg                v3d->dirty |= VC5_DIRTY_PRIM_MODE;
557b8e80941Smrg        }
558b8e80941Smrg
559b8e80941Smrg        v3d_start_draw(v3d);
560b8e80941Smrg        v3d_update_compiled_shaders(v3d, info->mode);
561b8e80941Smrg        v3d_update_job_ez(v3d, job);
562b8e80941Smrg
563b8e80941Smrg#if V3D_VERSION >= 41
564b8e80941Smrg        v3d41_emit_state(pctx);
565b8e80941Smrg#else
566b8e80941Smrg        v3d33_emit_state(pctx);
567b8e80941Smrg#endif
568b8e80941Smrg
569b8e80941Smrg        if (v3d->dirty & (VC5_DIRTY_VTXBUF |
570b8e80941Smrg                          VC5_DIRTY_VTXSTATE |
571b8e80941Smrg                          VC5_DIRTY_PRIM_MODE |
572b8e80941Smrg                          VC5_DIRTY_RASTERIZER |
573b8e80941Smrg                          VC5_DIRTY_COMPILED_CS |
574b8e80941Smrg                          VC5_DIRTY_COMPILED_VS |
575b8e80941Smrg                          VC5_DIRTY_COMPILED_FS |
576b8e80941Smrg                          v3d->prog.cs->uniform_dirty_bits |
577b8e80941Smrg                          v3d->prog.vs->uniform_dirty_bits |
578b8e80941Smrg                          v3d->prog.fs->uniform_dirty_bits)) {
579b8e80941Smrg                v3d_emit_gl_shader_state(v3d, info);
580b8e80941Smrg        }
581b8e80941Smrg
582b8e80941Smrg        v3d->dirty = 0;
583b8e80941Smrg
584b8e80941Smrg        /* The Base Vertex/Base Instance packet sets those values to nonzero
585b8e80941Smrg         * for the next draw call only.
586b8e80941Smrg         */
587b8e80941Smrg        if (info->index_bias || info->start_instance) {
588b8e80941Smrg                cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
589b8e80941Smrg                        base.base_instance = info->start_instance;
590b8e80941Smrg                        base.base_vertex = info->index_bias;
591b8e80941Smrg                }
592b8e80941Smrg        }
593b8e80941Smrg
594b8e80941Smrg        uint32_t prim_tf_enable = 0;
595b8e80941Smrg#if V3D_VERSION < 40
596b8e80941Smrg        /* V3D 3.x: The HW only processes transform feedback on primitives
597b8e80941Smrg         * with the flag set.
598b8e80941Smrg         */
599b8e80941Smrg        if (v3d->streamout.num_targets)
600b8e80941Smrg                prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
601b8e80941Smrg#endif
602b8e80941Smrg
603b8e80941Smrg        v3d_tf_statistics_record(v3d, info, v3d->streamout.num_targets);
604b8e80941Smrg
605b8e80941Smrg        /* Note that the primitive type fields match with OpenGL/gallium
606b8e80941Smrg         * definitions, up to but not including QUADS.
607b8e80941Smrg         */
608b8e80941Smrg        if (info->index_size) {
609b8e80941Smrg                uint32_t index_size = info->index_size;
610b8e80941Smrg                uint32_t offset = info->start * index_size;
611b8e80941Smrg                struct pipe_resource *prsc;
612b8e80941Smrg                if (info->has_user_indices) {
613b8e80941Smrg                        prsc = NULL;
614b8e80941Smrg                        u_upload_data(v3d->uploader, 0,
615b8e80941Smrg                                      info->count * info->index_size, 4,
616b8e80941Smrg                                      info->index.user,
617b8e80941Smrg                                      &offset, &prsc);
618b8e80941Smrg                } else {
619b8e80941Smrg                        prsc = info->index.resource;
620b8e80941Smrg                }
621b8e80941Smrg                struct v3d_resource *rsc = v3d_resource(prsc);
622b8e80941Smrg
623b8e80941Smrg#if V3D_VERSION >= 40
624b8e80941Smrg                cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
625b8e80941Smrg                        ib.address = cl_address(rsc->bo, 0);
626b8e80941Smrg                        ib.size = rsc->bo->size;
627b8e80941Smrg                }
628b8e80941Smrg#endif
629b8e80941Smrg
630b8e80941Smrg                if (info->indirect) {
631b8e80941Smrg                        cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
632b8e80941Smrg                                prim.index_type = ffs(info->index_size) - 1;
633b8e80941Smrg#if V3D_VERSION < 40
634b8e80941Smrg                                prim.address_of_indices_list =
635b8e80941Smrg                                        cl_address(rsc->bo, offset);
636b8e80941Smrg#endif /* V3D_VERSION < 40 */
637b8e80941Smrg                                prim.mode = info->mode | prim_tf_enable;
638b8e80941Smrg                                prim.enable_primitive_restarts = info->primitive_restart;
639b8e80941Smrg
640b8e80941Smrg                                prim.number_of_draw_indirect_indexed_records = info->indirect->draw_count;
641b8e80941Smrg
642b8e80941Smrg                                prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2;
643b8e80941Smrg                                prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo,
644b8e80941Smrg                                                          info->indirect->offset);
645b8e80941Smrg                        }
646b8e80941Smrg                } else if (info->instance_count > 1) {
647b8e80941Smrg                        cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
648b8e80941Smrg                                prim.index_type = ffs(info->index_size) - 1;
649b8e80941Smrg#if V3D_VERSION >= 40
650b8e80941Smrg                                prim.index_offset = offset;
651b8e80941Smrg#else /* V3D_VERSION < 40 */
652b8e80941Smrg                                prim.maximum_index = (1u << 31) - 1; /* XXX */
653b8e80941Smrg                                prim.address_of_indices_list =
654b8e80941Smrg                                        cl_address(rsc->bo, offset);
655b8e80941Smrg#endif /* V3D_VERSION < 40 */
656b8e80941Smrg                                prim.mode = info->mode | prim_tf_enable;
657b8e80941Smrg                                prim.enable_primitive_restarts = info->primitive_restart;
658b8e80941Smrg
659b8e80941Smrg                                prim.number_of_instances = info->instance_count;
660b8e80941Smrg                                prim.instance_length = info->count;
661b8e80941Smrg                        }
662b8e80941Smrg                } else {
663b8e80941Smrg                        cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
664b8e80941Smrg                                prim.index_type = ffs(info->index_size) - 1;
665b8e80941Smrg                                prim.length = info->count;
666b8e80941Smrg#if V3D_VERSION >= 40
667b8e80941Smrg                                prim.index_offset = offset;
668b8e80941Smrg#else /* V3D_VERSION < 40 */
669b8e80941Smrg                                prim.maximum_index = (1u << 31) - 1; /* XXX */
670b8e80941Smrg                                prim.address_of_indices_list =
671b8e80941Smrg                                        cl_address(rsc->bo, offset);
672b8e80941Smrg#endif /* V3D_VERSION < 40 */
673b8e80941Smrg                                prim.mode = info->mode | prim_tf_enable;
674b8e80941Smrg                                prim.enable_primitive_restarts = info->primitive_restart;
675b8e80941Smrg                        }
676b8e80941Smrg                }
677b8e80941Smrg
678b8e80941Smrg                job->draw_calls_queued++;
679b8e80941Smrg
680b8e80941Smrg                if (info->has_user_indices)
681b8e80941Smrg                        pipe_resource_reference(&prsc, NULL);
682b8e80941Smrg        } else {
683b8e80941Smrg                if (info->indirect) {
684b8e80941Smrg                        cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
685b8e80941Smrg                                prim.mode = info->mode | prim_tf_enable;
686b8e80941Smrg                                prim.number_of_draw_indirect_array_records = info->indirect->draw_count;
687b8e80941Smrg
688b8e80941Smrg                                prim.stride_in_multiples_of_4_bytes = info->indirect->stride >> 2;
689b8e80941Smrg                                prim.address = cl_address(v3d_resource(info->indirect->buffer)->bo,
690b8e80941Smrg                                                          info->indirect->offset);
691b8e80941Smrg                        }
692b8e80941Smrg                } else if (info->instance_count > 1) {
693b8e80941Smrg                        cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
694b8e80941Smrg                                prim.mode = info->mode | prim_tf_enable;
695b8e80941Smrg                                prim.index_of_first_vertex = info->start;
696b8e80941Smrg                                prim.number_of_instances = info->instance_count;
697b8e80941Smrg                                prim.instance_length = info->count;
698b8e80941Smrg                        }
699b8e80941Smrg                } else {
700b8e80941Smrg                        cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) {
701b8e80941Smrg                                prim.mode = info->mode | prim_tf_enable;
702b8e80941Smrg                                prim.length = info->count;
703b8e80941Smrg                                prim.index_of_first_vertex = info->start;
704b8e80941Smrg                        }
705b8e80941Smrg                }
706b8e80941Smrg        }
707b8e80941Smrg
708b8e80941Smrg        /* A flush is required in between a TF draw and any following TF specs
709b8e80941Smrg         * packet, or the GPU may hang.  Just flush each time for now.
710b8e80941Smrg         */
711b8e80941Smrg        if (v3d->streamout.num_targets)
712b8e80941Smrg                cl_emit(&job->bcl, TRANSFORM_FEEDBACK_FLUSH_AND_COUNT, flush);
713b8e80941Smrg
714b8e80941Smrg        job->draw_calls_queued++;
715b8e80941Smrg
716b8e80941Smrg        /* Increment the TF offsets by how many verts we wrote.  XXX: This
717b8e80941Smrg         * needs some clamping to the buffer size.
718b8e80941Smrg         */
719b8e80941Smrg        for (int i = 0; i < v3d->streamout.num_targets; i++)
720b8e80941Smrg                v3d->streamout.offsets[i] += info->count;
721b8e80941Smrg
722b8e80941Smrg        if (v3d->zsa && job->zsbuf && v3d->zsa->base.depth.enabled) {
723b8e80941Smrg                struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
724b8e80941Smrg                v3d_job_add_bo(job, rsc->bo);
725b8e80941Smrg
726b8e80941Smrg                job->load |= PIPE_CLEAR_DEPTH & ~job->clear;
727b8e80941Smrg                if (v3d->zsa->base.depth.writemask)
728b8e80941Smrg                        job->store |= PIPE_CLEAR_DEPTH;
729b8e80941Smrg                rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
730b8e80941Smrg        }
731b8e80941Smrg
732b8e80941Smrg        if (v3d->zsa && job->zsbuf && v3d->zsa->base.stencil[0].enabled) {
733b8e80941Smrg                struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
734b8e80941Smrg                if (rsc->separate_stencil)
735b8e80941Smrg                        rsc = rsc->separate_stencil;
736b8e80941Smrg
737b8e80941Smrg                v3d_job_add_bo(job, rsc->bo);
738b8e80941Smrg
739b8e80941Smrg                job->load |= PIPE_CLEAR_STENCIL & ~job->clear;
740b8e80941Smrg                if (v3d->zsa->base.stencil[0].writemask ||
741b8e80941Smrg                    v3d->zsa->base.stencil[1].writemask) {
742b8e80941Smrg                        job->store |= PIPE_CLEAR_STENCIL;
743b8e80941Smrg                }
744b8e80941Smrg                rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
745b8e80941Smrg        }
746b8e80941Smrg
747b8e80941Smrg        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
748b8e80941Smrg                uint32_t bit = PIPE_CLEAR_COLOR0 << i;
749b8e80941Smrg                int blend_rt = v3d->blend->base.independent_blend_enable ? i : 0;
750b8e80941Smrg
751b8e80941Smrg                if (job->store & bit || !job->cbufs[i])
752b8e80941Smrg                        continue;
753b8e80941Smrg                struct v3d_resource *rsc = v3d_resource(job->cbufs[i]->texture);
754b8e80941Smrg
755b8e80941Smrg                job->load |= bit & ~job->clear;
756b8e80941Smrg                if (v3d->blend->base.rt[blend_rt].colormask)
757b8e80941Smrg                        job->store |= bit;
758b8e80941Smrg                v3d_job_add_bo(job, rsc->bo);
759b8e80941Smrg        }
760b8e80941Smrg
761b8e80941Smrg        if (job->referenced_size > 768 * 1024 * 1024) {
762b8e80941Smrg                perf_debug("Flushing job with %dkb to try to free up memory\n",
763b8e80941Smrg                        job->referenced_size / 1024);
764b8e80941Smrg                v3d_flush(pctx);
765b8e80941Smrg        }
766b8e80941Smrg
767b8e80941Smrg        if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
768b8e80941Smrg                v3d_flush(pctx);
769b8e80941Smrg}
770b8e80941Smrg
771b8e80941Smrg/**
772b8e80941Smrg * Implements gallium's clear() hook (glClear()) by drawing a pair of triangles.
773b8e80941Smrg */
774b8e80941Smrgstatic void
775b8e80941Smrgv3d_draw_clear(struct v3d_context *v3d,
776b8e80941Smrg               unsigned buffers,
777b8e80941Smrg               const union pipe_color_union *color,
778b8e80941Smrg               double depth, unsigned stencil)
779b8e80941Smrg{
780b8e80941Smrg        static const union pipe_color_union dummy_color = {};
781b8e80941Smrg
782b8e80941Smrg        /* The blitter util dereferences the color regardless, even though the
783b8e80941Smrg         * gallium clear API may not pass one in when only Z/S are cleared.
784b8e80941Smrg         */
785b8e80941Smrg        if (!color)
786b8e80941Smrg                color = &dummy_color;
787b8e80941Smrg
788b8e80941Smrg        v3d_blitter_save(v3d);
789b8e80941Smrg        util_blitter_clear(v3d->blitter,
790b8e80941Smrg                           v3d->framebuffer.width,
791b8e80941Smrg                           v3d->framebuffer.height,
792b8e80941Smrg                           util_framebuffer_get_num_layers(&v3d->framebuffer),
793b8e80941Smrg                           buffers, color, depth, stencil);
794b8e80941Smrg}
795b8e80941Smrg
796b8e80941Smrg/**
797b8e80941Smrg * Attempts to perform the GL clear by using the TLB's fast clear at the start
798b8e80941Smrg * of the frame.
799b8e80941Smrg */
800b8e80941Smrgstatic unsigned
801b8e80941Smrgv3d_tlb_clear(struct v3d_job *job, unsigned buffers,
802b8e80941Smrg              const union pipe_color_union *color,
803b8e80941Smrg              double depth, unsigned stencil)
804b8e80941Smrg{
805b8e80941Smrg        struct v3d_context *v3d = job->v3d;
806b8e80941Smrg
807b8e80941Smrg        if (job->draw_calls_queued) {
808b8e80941Smrg                /* If anything in the CL has drawn using the buffer, then the
809b8e80941Smrg                 * TLB clear we're trying to add now would happen before that
810b8e80941Smrg                 * drawing.
811b8e80941Smrg                 */
812b8e80941Smrg                buffers &= ~(job->load | job->store);
813b8e80941Smrg        }
814b8e80941Smrg
815b8e80941Smrg        /* GFXH-1461: If we were to emit a load of just depth or just stencil,
816b8e80941Smrg         * then the clear for the other may get lost.  We need to decide now
817b8e80941Smrg         * if it would be possible to need to emit a load of just one after
818b8e80941Smrg         * we've set up our TLB clears.
819b8e80941Smrg         */
820b8e80941Smrg        if (buffers & PIPE_CLEAR_DEPTHSTENCIL &&
821b8e80941Smrg            (buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL &&
822b8e80941Smrg            job->zsbuf &&
823b8e80941Smrg            util_format_is_depth_and_stencil(job->zsbuf->texture->format)) {
824b8e80941Smrg                buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
825b8e80941Smrg        }
826b8e80941Smrg
827b8e80941Smrg        for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
828b8e80941Smrg                uint32_t bit = PIPE_CLEAR_COLOR0 << i;
829b8e80941Smrg                if (!(buffers & bit))
830b8e80941Smrg                        continue;
831b8e80941Smrg
832b8e80941Smrg                struct pipe_surface *psurf = v3d->framebuffer.cbufs[i];
833b8e80941Smrg                struct v3d_surface *surf = v3d_surface(psurf);
834b8e80941Smrg                struct v3d_resource *rsc = v3d_resource(psurf->texture);
835b8e80941Smrg
836b8e80941Smrg                union util_color uc;
837b8e80941Smrg                uint32_t internal_size = 4 << surf->internal_bpp;
838b8e80941Smrg
839b8e80941Smrg                static union pipe_color_union swapped_color;
840b8e80941Smrg                if (v3d->swap_color_rb & (1 << i)) {
841b8e80941Smrg                        swapped_color.f[0] = color->f[2];
842b8e80941Smrg                        swapped_color.f[1] = color->f[1];
843b8e80941Smrg                        swapped_color.f[2] = color->f[0];
844b8e80941Smrg                        swapped_color.f[3] = color->f[3];
845b8e80941Smrg                        color = &swapped_color;
846b8e80941Smrg                }
847b8e80941Smrg
848b8e80941Smrg                switch (surf->internal_type) {
849b8e80941Smrg                case V3D_INTERNAL_TYPE_8:
850b8e80941Smrg                        util_pack_color(color->f, PIPE_FORMAT_R8G8B8A8_UNORM,
851b8e80941Smrg                                        &uc);
852b8e80941Smrg                        memcpy(job->clear_color[i], uc.ui, internal_size);
853b8e80941Smrg                        break;
854b8e80941Smrg                case V3D_INTERNAL_TYPE_8I:
855b8e80941Smrg                case V3D_INTERNAL_TYPE_8UI:
856b8e80941Smrg                        job->clear_color[i][0] = ((color->ui[0] & 0xff) |
857b8e80941Smrg                                                  (color->ui[1] & 0xff) << 8 |
858b8e80941Smrg                                                  (color->ui[2] & 0xff) << 16 |
859b8e80941Smrg                                                  (color->ui[3] & 0xff) << 24);
860b8e80941Smrg                        break;
861b8e80941Smrg                case V3D_INTERNAL_TYPE_16F:
862b8e80941Smrg                        util_pack_color(color->f, PIPE_FORMAT_R16G16B16A16_FLOAT,
863b8e80941Smrg                                        &uc);
864b8e80941Smrg                        memcpy(job->clear_color[i], uc.ui, internal_size);
865b8e80941Smrg                        break;
866b8e80941Smrg                case V3D_INTERNAL_TYPE_16I:
867b8e80941Smrg                case V3D_INTERNAL_TYPE_16UI:
868b8e80941Smrg                        job->clear_color[i][0] = ((color->ui[0] & 0xffff) |
869b8e80941Smrg                                                  color->ui[1] << 16);
870b8e80941Smrg                        job->clear_color[i][1] = ((color->ui[2] & 0xffff) |
871b8e80941Smrg                                                  color->ui[3] << 16);
872b8e80941Smrg                        break;
873b8e80941Smrg                case V3D_INTERNAL_TYPE_32F:
874b8e80941Smrg                case V3D_INTERNAL_TYPE_32I:
875b8e80941Smrg                case V3D_INTERNAL_TYPE_32UI:
876b8e80941Smrg                        memcpy(job->clear_color[i], color->ui, internal_size);
877b8e80941Smrg                        break;
878b8e80941Smrg                }
879b8e80941Smrg
880b8e80941Smrg                rsc->initialized_buffers |= bit;
881b8e80941Smrg        }
882b8e80941Smrg
883b8e80941Smrg        unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
884b8e80941Smrg        if (zsclear) {
885b8e80941Smrg                struct v3d_resource *rsc =
886b8e80941Smrg                        v3d_resource(v3d->framebuffer.zsbuf->texture);
887b8e80941Smrg
888b8e80941Smrg                if (zsclear & PIPE_CLEAR_DEPTH)
889b8e80941Smrg                        job->clear_z = depth;
890b8e80941Smrg                if (zsclear & PIPE_CLEAR_STENCIL)
891b8e80941Smrg                        job->clear_s = stencil;
892b8e80941Smrg
893b8e80941Smrg                rsc->initialized_buffers |= zsclear;
894b8e80941Smrg        }
895b8e80941Smrg
896b8e80941Smrg        job->draw_min_x = 0;
897b8e80941Smrg        job->draw_min_y = 0;
898b8e80941Smrg        job->draw_max_x = v3d->framebuffer.width;
899b8e80941Smrg        job->draw_max_y = v3d->framebuffer.height;
900b8e80941Smrg        job->clear |= buffers;
901b8e80941Smrg        job->store |= buffers;
902b8e80941Smrg
903b8e80941Smrg        v3d_start_draw(v3d);
904b8e80941Smrg
905b8e80941Smrg        return buffers;
906b8e80941Smrg}
907b8e80941Smrg
908b8e80941Smrgstatic void
909b8e80941Smrgv3d_clear(struct pipe_context *pctx, unsigned buffers,
910b8e80941Smrg          const union pipe_color_union *color, double depth, unsigned stencil)
911b8e80941Smrg{
912b8e80941Smrg        struct v3d_context *v3d = v3d_context(pctx);
913b8e80941Smrg        struct v3d_job *job = v3d_get_job_for_fbo(v3d);
914b8e80941Smrg
915b8e80941Smrg        buffers &= ~v3d_tlb_clear(job, buffers, color, depth, stencil);
916b8e80941Smrg
917b8e80941Smrg        if (buffers)
918b8e80941Smrg                v3d_draw_clear(v3d, buffers, color, depth, stencil);
919b8e80941Smrg}
920b8e80941Smrg
921b8e80941Smrgstatic void
922b8e80941Smrgv3d_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
923b8e80941Smrg                        const union pipe_color_union *color,
924b8e80941Smrg                        unsigned x, unsigned y, unsigned w, unsigned h,
925b8e80941Smrg                        bool render_condition_enabled)
926b8e80941Smrg{
927b8e80941Smrg        fprintf(stderr, "unimpl: clear RT\n");
928b8e80941Smrg}
929b8e80941Smrg
930b8e80941Smrgstatic void
931b8e80941Smrgv3d_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
932b8e80941Smrg                        unsigned buffers, double depth, unsigned stencil,
933b8e80941Smrg                        unsigned x, unsigned y, unsigned w, unsigned h,
934b8e80941Smrg                        bool render_condition_enabled)
935b8e80941Smrg{
936b8e80941Smrg        fprintf(stderr, "unimpl: clear DS\n");
937b8e80941Smrg}
938b8e80941Smrg
939b8e80941Smrgvoid
940b8e80941Smrgv3dX(draw_init)(struct pipe_context *pctx)
941b8e80941Smrg{
942b8e80941Smrg        pctx->draw_vbo = v3d_draw_vbo;
943b8e80941Smrg        pctx->clear = v3d_clear;
944b8e80941Smrg        pctx->clear_render_target = v3d_clear_render_target;
945b8e80941Smrg        pctx->clear_depth_stencil = v3d_clear_depth_stencil;
946b8e80941Smrg}
947