1848b8605Smrg/* 2848b8605Smrg * Copyright (c) 2014 Scott Mansell 3848b8605Smrg * Copyright © 2014 Broadcom 4848b8605Smrg * 5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6848b8605Smrg * copy of this software and associated documentation files (the "Software"), 7848b8605Smrg * to deal in the Software without restriction, including without limitation 8848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 10848b8605Smrg * Software is furnished to do so, subject to the following conditions: 11848b8605Smrg * 12848b8605Smrg * The above copyright notice and this permission notice (including the next 13848b8605Smrg * paragraph) shall be included in all copies or substantial portions of the 14848b8605Smrg * Software. 15848b8605Smrg * 16848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17848b8605Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20848b8605Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21848b8605Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22848b8605Smrg * IN THE SOFTWARE. 23848b8605Smrg */ 24848b8605Smrg 25b8e80941Smrg#include "util/u_blitter.h" 26b8e80941Smrg#include "util/u_prim.h" 27848b8605Smrg#include "util/u_format.h" 28848b8605Smrg#include "util/u_pack_color.h" 29b8e80941Smrg#include "util/u_upload_mgr.h" 30848b8605Smrg#include "indices/u_primconvert.h" 31848b8605Smrg 32848b8605Smrg#include "vc4_context.h" 33848b8605Smrg#include "vc4_resource.h" 34848b8605Smrg 35b8e80941Smrg#define VC4_HW_2116_COUNT 0x1ef0 36b8e80941Smrg 37b8e80941Smrgstatic void 38b8e80941Smrgvc4_get_draw_cl_space(struct vc4_job *job, int vert_count) 39b8e80941Smrg{ 40b8e80941Smrg /* The SW-5891 workaround may cause us to emit multiple shader recs 41b8e80941Smrg * and draw packets. 42b8e80941Smrg */ 43b8e80941Smrg int num_draws = DIV_ROUND_UP(vert_count, 65535 - 2) + 1; 44b8e80941Smrg 45b8e80941Smrg /* Binner gets our packet state -- vc4_emit.c contents, 46b8e80941Smrg * and the primitive itself. 47b8e80941Smrg */ 48b8e80941Smrg cl_ensure_space(&job->bcl, 49b8e80941Smrg 256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE + 50b8e80941Smrg VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws); 51b8e80941Smrg 52b8e80941Smrg /* Nothing for rcl -- that's covered by vc4_context.c */ 53b8e80941Smrg 54b8e80941Smrg /* shader_rec gets up to 12 dwords of reloc handles plus a maximally 55b8e80941Smrg * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of 56b8e80941Smrg * vattr stride). 57b8e80941Smrg */ 58b8e80941Smrg cl_ensure_space(&job->shader_rec, 59b8e80941Smrg (12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws); 60b8e80941Smrg 61b8e80941Smrg /* Uniforms are covered by vc4_write_uniforms(). */ 62b8e80941Smrg 63b8e80941Smrg /* There could be up to 16 textures per stage, plus misc other 64b8e80941Smrg * pointers. 65b8e80941Smrg */ 66b8e80941Smrg cl_ensure_space(&job->bo_handles, (2 * 16 + 20) * sizeof(uint32_t)); 67b8e80941Smrg cl_ensure_space(&job->bo_pointers, 68b8e80941Smrg (2 * 16 + 20) * sizeof(struct vc4_bo *)); 69b8e80941Smrg} 70b8e80941Smrg 71848b8605Smrg/** 72848b8605Smrg * Does the initial bining command list setup for drawing to a given FBO. 73848b8605Smrg */ 74848b8605Smrgstatic void 75848b8605Smrgvc4_start_draw(struct vc4_context *vc4) 76848b8605Smrg{ 77b8e80941Smrg struct vc4_job *job = vc4->job; 78b8e80941Smrg 79b8e80941Smrg if (job->needs_flush) 80848b8605Smrg return; 81848b8605Smrg 82b8e80941Smrg vc4_get_draw_cl_space(job, 0); 83b8e80941Smrg 84b8e80941Smrg cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION, bin) { 85b8e80941Smrg bin.width_in_tiles = job->draw_tiles_x; 86b8e80941Smrg bin.height_in_tiles = job->draw_tiles_y; 87b8e80941Smrg bin.multisample_mode_4x = job->msaa; 88b8e80941Smrg } 89848b8605Smrg 90b8e80941Smrg /* START_TILE_BINNING resets the statechange counters in the hardware, 91b8e80941Smrg * which are what is used when a primitive is binned to a tile to 92b8e80941Smrg * figure out what new state packets need to be written to that tile's 93b8e80941Smrg * command list. 94848b8605Smrg */ 95b8e80941Smrg cl_emit(&job->bcl, START_TILE_BINNING, start); 96b8e80941Smrg 97b8e80941Smrg /* Reset the current compressed primitives format. This gets modified 98b8e80941Smrg * by VC4_PACKET_GL_INDEXED_PRIMITIVE and 99b8e80941Smrg * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start 100b8e80941Smrg * of every tile. 101b8e80941Smrg */ 102b8e80941Smrg cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, list) { 103b8e80941Smrg list.data_type = _16_BIT_INDEX; 104b8e80941Smrg list.primitive_type = TRIANGLES_LIST; 105848b8605Smrg } 106b8e80941Smrg 107b8e80941Smrg job->needs_flush = true; 108b8e80941Smrg job->draw_width = vc4->framebuffer.width; 109b8e80941Smrg job->draw_height = vc4->framebuffer.height; 110b8e80941Smrg} 111b8e80941Smrg 112b8e80941Smrgstatic void 113b8e80941Smrgvc4_predraw_check_textures(struct pipe_context *pctx, 114b8e80941Smrg struct vc4_texture_stateobj *stage_tex) 115b8e80941Smrg{ 116b8e80941Smrg struct vc4_context *vc4 = vc4_context(pctx); 117b8e80941Smrg 118b8e80941Smrg for (int i = 0; i < stage_tex->num_textures; i++) { 119b8e80941Smrg struct vc4_sampler_view *view = 120b8e80941Smrg vc4_sampler_view(stage_tex->textures[i]); 121b8e80941Smrg if (!view) 122b8e80941Smrg continue; 123b8e80941Smrg 124b8e80941Smrg if (view->texture != view->base.texture) 125b8e80941Smrg vc4_update_shadow_baselevel_texture(pctx, &view->base); 126b8e80941Smrg 127b8e80941Smrg vc4_flush_jobs_writing_resource(vc4, view->texture); 128b8e80941Smrg } 129b8e80941Smrg} 130b8e80941Smrg 131b8e80941Smrgstatic void 132b8e80941Smrgvc4_emit_gl_shader_state(struct vc4_context *vc4, 133b8e80941Smrg const struct pipe_draw_info *info, 134b8e80941Smrg uint32_t extra_index_bias) 135b8e80941Smrg{ 136b8e80941Smrg struct vc4_job *job = vc4->job; 137b8e80941Smrg /* VC4_DIRTY_VTXSTATE */ 138b8e80941Smrg struct vc4_vertex_stateobj *vtx = vc4->vtx; 139b8e80941Smrg /* VC4_DIRTY_VTXBUF */ 140b8e80941Smrg struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; 141b8e80941Smrg 142b8e80941Smrg /* The simulator throws a fit if VS or CS don't read an attribute, so 143b8e80941Smrg * we emit a dummy read. 144b8e80941Smrg */ 145b8e80941Smrg uint32_t num_elements_emit = MAX2(vtx->num_elements, 1); 146b8e80941Smrg 147b8e80941Smrg /* Emit the shader record. */ 148b8e80941Smrg cl_start_shader_reloc(&job->shader_rec, 3 + num_elements_emit); 149b8e80941Smrg 150b8e80941Smrg cl_emit(&job->shader_rec, SHADER_RECORD, rec) { 151b8e80941Smrg rec.enable_clipping = true; 152b8e80941Smrg 153b8e80941Smrg /* VC4_DIRTY_COMPILED_FS */ 154b8e80941Smrg rec.fragment_shader_is_single_threaded = 155b8e80941Smrg !vc4->prog.fs->fs_threaded; 156b8e80941Smrg 157b8e80941Smrg /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */ 158b8e80941Smrg rec.point_size_included_in_shaded_vertex_data = 159b8e80941Smrg (info->mode == PIPE_PRIM_POINTS && 160b8e80941Smrg vc4->rasterizer->base.point_size_per_vertex); 161b8e80941Smrg 162b8e80941Smrg /* VC4_DIRTY_COMPILED_FS */ 163b8e80941Smrg rec.fragment_shader_number_of_varyings = 164b8e80941Smrg vc4->prog.fs->num_inputs; 165b8e80941Smrg rec.fragment_shader_code_address = 166b8e80941Smrg cl_address(vc4->prog.fs->bo, 0); 167b8e80941Smrg 168b8e80941Smrg rec.coordinate_shader_attribute_array_select_bits = 169b8e80941Smrg vc4->prog.cs->vattrs_live; 170b8e80941Smrg rec.coordinate_shader_total_attributes_size = 171b8e80941Smrg vc4->prog.cs->vattr_offsets[8]; 172b8e80941Smrg rec.coordinate_shader_code_address = 173b8e80941Smrg cl_address(vc4->prog.cs->bo, 0); 174b8e80941Smrg 175b8e80941Smrg rec.vertex_shader_attribute_array_select_bits = 176b8e80941Smrg vc4->prog.vs->vattrs_live; 177b8e80941Smrg rec.vertex_shader_total_attributes_size = 178b8e80941Smrg vc4->prog.vs->vattr_offsets[8]; 179b8e80941Smrg rec.vertex_shader_code_address = 180b8e80941Smrg cl_address(vc4->prog.vs->bo, 0); 181b8e80941Smrg }; 182b8e80941Smrg 183b8e80941Smrg uint32_t max_index = 0xffff; 184b8e80941Smrg for (int i = 0; i < vtx->num_elements; i++) { 185b8e80941Smrg struct pipe_vertex_element *elem = &vtx->pipe[i]; 186b8e80941Smrg struct pipe_vertex_buffer *vb = 187b8e80941Smrg &vertexbuf->vb[elem->vertex_buffer_index]; 188b8e80941Smrg struct vc4_resource *rsc = vc4_resource(vb->buffer.resource); 189b8e80941Smrg /* not vc4->dirty tracked: vc4->last_index_bias */ 190b8e80941Smrg uint32_t offset = (vb->buffer_offset + 191b8e80941Smrg elem->src_offset + 192b8e80941Smrg vb->stride * (info->index_bias + 193b8e80941Smrg extra_index_bias)); 194b8e80941Smrg uint32_t vb_size = rsc->bo->size - offset; 195b8e80941Smrg uint32_t elem_size = 196b8e80941Smrg util_format_get_blocksize(elem->src_format); 197b8e80941Smrg 198b8e80941Smrg cl_emit(&job->shader_rec, ATTRIBUTE_RECORD, attr) { 199b8e80941Smrg attr.address = cl_address(rsc->bo, offset); 200b8e80941Smrg attr.number_of_bytes_minus_1 = elem_size - 1; 201b8e80941Smrg attr.stride = vb->stride; 202b8e80941Smrg attr.coordinate_shader_vpm_offset = 203b8e80941Smrg vc4->prog.cs->vattr_offsets[i]; 204b8e80941Smrg attr.vertex_shader_vpm_offset = 205b8e80941Smrg vc4->prog.vs->vattr_offsets[i]; 206b8e80941Smrg } 207b8e80941Smrg 208b8e80941Smrg if (vb->stride > 0) { 209b8e80941Smrg max_index = MIN2(max_index, 210b8e80941Smrg (vb_size - elem_size) / vb->stride); 211b8e80941Smrg } 212848b8605Smrg } 213848b8605Smrg 214b8e80941Smrg if (vtx->num_elements == 0) { 215b8e80941Smrg assert(num_elements_emit == 1); 216b8e80941Smrg struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO"); 217b8e80941Smrg 218b8e80941Smrg cl_emit(&job->shader_rec, ATTRIBUTE_RECORD, attr) { 219b8e80941Smrg attr.address = cl_address(bo, 0); 220b8e80941Smrg attr.number_of_bytes_minus_1 = 16 - 1; 221b8e80941Smrg attr.stride = 0; 222b8e80941Smrg attr.coordinate_shader_vpm_offset = 0; 223b8e80941Smrg attr.vertex_shader_vpm_offset = 0; 224b8e80941Smrg } 225b8e80941Smrg 226b8e80941Smrg vc4_bo_unreference(&bo); 227b8e80941Smrg } 228b8e80941Smrg 229b8e80941Smrg cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) { 230b8e80941Smrg /* Note that number of attributes == 0 in the packet means 8 231b8e80941Smrg * attributes. This field also contains the offset into 232b8e80941Smrg * shader_rec. 233b8e80941Smrg */ 234b8e80941Smrg assert(vtx->num_elements <= 8); 235b8e80941Smrg shader_state.number_of_attribute_arrays = 236b8e80941Smrg num_elements_emit & 0x7; 237b8e80941Smrg } 238b8e80941Smrg 239b8e80941Smrg vc4_write_uniforms(vc4, vc4->prog.fs, 240b8e80941Smrg &vc4->constbuf[PIPE_SHADER_FRAGMENT], 241b8e80941Smrg &vc4->fragtex); 242b8e80941Smrg vc4_write_uniforms(vc4, vc4->prog.vs, 243b8e80941Smrg &vc4->constbuf[PIPE_SHADER_VERTEX], 244b8e80941Smrg &vc4->verttex); 245b8e80941Smrg vc4_write_uniforms(vc4, vc4->prog.cs, 246b8e80941Smrg &vc4->constbuf[PIPE_SHADER_VERTEX], 247b8e80941Smrg &vc4->verttex); 248b8e80941Smrg 249b8e80941Smrg vc4->last_index_bias = info->index_bias + extra_index_bias; 250b8e80941Smrg vc4->max_index = max_index; 251b8e80941Smrg job->shader_rec_count++; 252b8e80941Smrg} 253b8e80941Smrg 254b8e80941Smrg/** 255b8e80941Smrg * HW-2116 workaround: Flush the batch before triggering the hardware state 256b8e80941Smrg * counter wraparound behavior. 257b8e80941Smrg * 258b8e80941Smrg * State updates are tracked by a global counter which increments at the first 259b8e80941Smrg * state update after a draw or a START_BINNING. Tiles can then have their 260b8e80941Smrg * state updated at draw time with a set of cheap checks for whether the 261b8e80941Smrg * state's copy of the global counter matches the global counter the last time 262b8e80941Smrg * that state was written to the tile. 263b8e80941Smrg * 264b8e80941Smrg * The state counters are relatively small and wrap around quickly, so you 265b8e80941Smrg * could get false negatives for needing to update a particular state in the 266b8e80941Smrg * tile. To avoid this, the hardware attempts to write all of the state in 267b8e80941Smrg * the tile at wraparound time. This apparently is broken, so we just flush 268b8e80941Smrg * everything before that behavior is triggered. A batch flush is sufficient 269b8e80941Smrg * to get our current contents drawn and reset the counters to 0. 270b8e80941Smrg * 271b8e80941Smrg * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the 272b8e80941Smrg * tiles with VC4_PACKET_RETURN_FROM_LIST. 273b8e80941Smrg */ 274b8e80941Smrgstatic void 275b8e80941Smrgvc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count) 276b8e80941Smrg{ 277b8e80941Smrg struct vc4_context *vc4 = vc4_context(pctx); 278b8e80941Smrg struct vc4_job *job = vc4_get_job_for_fbo(vc4); 279b8e80941Smrg 280b8e80941Smrg if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) { 281b8e80941Smrg perf_debug("Flushing batch due to HW-2116 workaround " 282b8e80941Smrg "(too many draw calls per scene\n"); 283b8e80941Smrg vc4_job_submit(vc4, job); 284b8e80941Smrg } 285848b8605Smrg} 286848b8605Smrg 287848b8605Smrgstatic void 288848b8605Smrgvc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) 289848b8605Smrg{ 290848b8605Smrg struct vc4_context *vc4 = vc4_context(pctx); 291b8e80941Smrg struct pipe_draw_info local_info; 292b8e80941Smrg 293b8e80941Smrg if (!info->count_from_stream_output && !info->indirect && 294b8e80941Smrg !info->primitive_restart && 295b8e80941Smrg !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) 296b8e80941Smrg return; 297848b8605Smrg 298848b8605Smrg if (info->mode >= PIPE_PRIM_QUADS) { 299b8e80941Smrg if (info->mode == PIPE_PRIM_QUADS && 300b8e80941Smrg info->count == 4 && 301b8e80941Smrg !vc4->rasterizer->base.flatshade) { 302b8e80941Smrg local_info = *info; 303b8e80941Smrg local_info.mode = PIPE_PRIM_TRIANGLE_FAN; 304b8e80941Smrg info = &local_info; 305b8e80941Smrg } else { 306b8e80941Smrg util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); 307b8e80941Smrg util_primconvert_draw_vbo(vc4->primconvert, info); 308b8e80941Smrg perf_debug("Fallback conversion for %d %s vertices\n", 309b8e80941Smrg info->count, u_prim_name(info->mode)); 310b8e80941Smrg return; 311b8e80941Smrg } 312b8e80941Smrg } 313b8e80941Smrg 314b8e80941Smrg /* Before setting up the draw, do any fixup blits necessary. */ 315b8e80941Smrg vc4_predraw_check_textures(pctx, &vc4->verttex); 316b8e80941Smrg vc4_predraw_check_textures(pctx, &vc4->fragtex); 317b8e80941Smrg 318b8e80941Smrg vc4_hw_2116_workaround(pctx, info->count); 319b8e80941Smrg 320b8e80941Smrg struct vc4_job *job = vc4_get_job_for_fbo(vc4); 321b8e80941Smrg 322b8e80941Smrg /* Make sure that the raster order flags haven't changed, which can 323b8e80941Smrg * only be set at job granularity. 324b8e80941Smrg */ 325b8e80941Smrg if (job->flags != vc4->rasterizer->tile_raster_order_flags) { 326b8e80941Smrg vc4_job_submit(vc4, job); 327b8e80941Smrg job = vc4_get_job_for_fbo(vc4); 328b8e80941Smrg } 329b8e80941Smrg 330b8e80941Smrg vc4_get_draw_cl_space(job, info->count); 331b8e80941Smrg 332b8e80941Smrg if (vc4->prim_mode != info->mode) { 333b8e80941Smrg vc4->prim_mode = info->mode; 334b8e80941Smrg vc4->dirty |= VC4_DIRTY_PRIM_MODE; 335848b8605Smrg } 336848b8605Smrg 337848b8605Smrg vc4_start_draw(vc4); 338b8e80941Smrg if (!vc4_update_compiled_shaders(vc4, info->mode)) { 339b8e80941Smrg debug_warn_once("shader compile failed, skipping draw call.\n"); 340b8e80941Smrg return; 341b8e80941Smrg } 342848b8605Smrg 343848b8605Smrg vc4_emit_state(pctx); 344848b8605Smrg 345b8e80941Smrg bool needs_drawarrays_shader_state = false; 346b8e80941Smrg 347b8e80941Smrg if ((vc4->dirty & (VC4_DIRTY_VTXBUF | 348b8e80941Smrg VC4_DIRTY_VTXSTATE | 349b8e80941Smrg VC4_DIRTY_PRIM_MODE | 350b8e80941Smrg VC4_DIRTY_RASTERIZER | 351b8e80941Smrg VC4_DIRTY_COMPILED_CS | 352b8e80941Smrg VC4_DIRTY_COMPILED_VS | 353b8e80941Smrg VC4_DIRTY_COMPILED_FS | 354b8e80941Smrg vc4->prog.cs->uniform_dirty_bits | 355b8e80941Smrg vc4->prog.vs->uniform_dirty_bits | 356b8e80941Smrg vc4->prog.fs->uniform_dirty_bits)) || 357b8e80941Smrg vc4->last_index_bias != info->index_bias) { 358b8e80941Smrg if (info->index_size) 359b8e80941Smrg vc4_emit_gl_shader_state(vc4, info, 0); 360b8e80941Smrg else 361b8e80941Smrg needs_drawarrays_shader_state = true; 362b8e80941Smrg } 363b8e80941Smrg 364b8e80941Smrg vc4->dirty = 0; 365848b8605Smrg 366848b8605Smrg /* Note that the primitive type fields match with OpenGL/gallium 367848b8605Smrg * definitions, up to but not including QUADS. 368848b8605Smrg */ 369b8e80941Smrg if (info->index_size) { 370b8e80941Smrg uint32_t index_size = info->index_size; 371b8e80941Smrg uint32_t offset = info->start * index_size; 372b8e80941Smrg struct pipe_resource *prsc; 373b8e80941Smrg if (info->index_size == 4) { 374b8e80941Smrg prsc = vc4_get_shadow_index_buffer(pctx, info, 375b8e80941Smrg offset, 376b8e80941Smrg info->count, &offset); 377b8e80941Smrg index_size = 2; 378b8e80941Smrg } else { 379b8e80941Smrg if (info->has_user_indices) { 380b8e80941Smrg prsc = NULL; 381b8e80941Smrg u_upload_data(vc4->uploader, 0, 382b8e80941Smrg info->count * index_size, 4, 383b8e80941Smrg info->index.user, 384b8e80941Smrg &offset, &prsc); 385b8e80941Smrg } else { 386b8e80941Smrg prsc = info->index.resource; 387b8e80941Smrg } 388b8e80941Smrg } 389b8e80941Smrg struct vc4_resource *rsc = vc4_resource(prsc); 390b8e80941Smrg 391b8e80941Smrg struct vc4_cl_out *bcl = cl_start(&job->bcl); 392b8e80941Smrg 393b8e80941Smrg /* The original design for the VC4 kernel UABI had multiple 394b8e80941Smrg * packets that used relocations in the BCL (some of which 395b8e80941Smrg * needed two BOs), but later modifications eliminated all but 396b8e80941Smrg * this one usage. We have an arbitrary 32-bit offset value, 397b8e80941Smrg * and need to also supply an arbitrary 32-bit index buffer 398b8e80941Smrg * GEM handle, so we have this fake packet we emit in our BCL 399b8e80941Smrg * to be validated, which the kernel uses at validation time 400b8e80941Smrg * to perform the relocation in the IB packet (without 401b8e80941Smrg * emitting to the actual HW). 402b8e80941Smrg */ 403b8e80941Smrg uint32_t hindex = vc4_gem_hindex(job, rsc->bo); 404b8e80941Smrg if (job->last_gem_handle_hindex != hindex) { 405b8e80941Smrg cl_u8(&bcl, VC4_PACKET_GEM_HANDLES); 406b8e80941Smrg cl_u32(&bcl, hindex); 407b8e80941Smrg cl_u32(&bcl, 0); 408b8e80941Smrg job->last_gem_handle_hindex = hindex; 409b8e80941Smrg } 410b8e80941Smrg 411b8e80941Smrg cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); 412b8e80941Smrg cl_u8(&bcl, 413848b8605Smrg info->mode | 414b8e80941Smrg (index_size == 2 ? 415848b8605Smrg VC4_INDEX_BUFFER_U16: 416848b8605Smrg VC4_INDEX_BUFFER_U8)); 417b8e80941Smrg cl_u32(&bcl, info->count); 418b8e80941Smrg cl_u32(&bcl, offset); 419b8e80941Smrg cl_u32(&bcl, vc4->max_index); 420b8e80941Smrg 421b8e80941Smrg cl_end(&job->bcl, bcl); 422b8e80941Smrg job->draw_calls_queued++; 423b8e80941Smrg 424b8e80941Smrg if (info->index_size == 4 || info->has_user_indices) 425b8e80941Smrg pipe_resource_reference(&prsc, NULL); 426848b8605Smrg } else { 427b8e80941Smrg uint32_t count = info->count; 428b8e80941Smrg uint32_t start = info->start; 429b8e80941Smrg uint32_t extra_index_bias = 0; 430b8e80941Smrg static const uint32_t max_verts = 65535; 431b8e80941Smrg 432b8e80941Smrg /* GFXH-515 / SW-5891: The binner emits 16 bit indices for 433b8e80941Smrg * drawarrays, which means that if start + count > 64k it 434b8e80941Smrg * would truncate the top bits. Work around this by emitting 435b8e80941Smrg * a limited number of primitives at a time and reemitting the 436b8e80941Smrg * shader state pointing farther down the vertex attribute 437b8e80941Smrg * arrays. 438b8e80941Smrg * 439b8e80941Smrg * To do this properly for line loops or trifans, we'd need to 440b8e80941Smrg * make a new VB containing the first vertex plus whatever 441b8e80941Smrg * remainder. 442b8e80941Smrg */ 443b8e80941Smrg if (start + count > max_verts) { 444b8e80941Smrg extra_index_bias = start; 445b8e80941Smrg start = 0; 446b8e80941Smrg needs_drawarrays_shader_state = true; 447b8e80941Smrg } 448b8e80941Smrg 449b8e80941Smrg while (count) { 450b8e80941Smrg uint32_t this_count = count; 451b8e80941Smrg uint32_t step = count; 452b8e80941Smrg 453b8e80941Smrg if (needs_drawarrays_shader_state) { 454b8e80941Smrg vc4_emit_gl_shader_state(vc4, info, 455b8e80941Smrg extra_index_bias); 456b8e80941Smrg } 457b8e80941Smrg 458b8e80941Smrg if (count > max_verts) { 459b8e80941Smrg switch (info->mode) { 460b8e80941Smrg case PIPE_PRIM_POINTS: 461b8e80941Smrg this_count = step = max_verts; 462b8e80941Smrg break; 463b8e80941Smrg case PIPE_PRIM_LINES: 464b8e80941Smrg this_count = step = max_verts - (max_verts % 2); 465b8e80941Smrg break; 466b8e80941Smrg case PIPE_PRIM_LINE_STRIP: 467b8e80941Smrg this_count = max_verts; 468b8e80941Smrg step = max_verts - 1; 469b8e80941Smrg break; 470b8e80941Smrg case PIPE_PRIM_LINE_LOOP: 471b8e80941Smrg this_count = max_verts; 472b8e80941Smrg step = max_verts - 1; 473b8e80941Smrg debug_warn_once("unhandled line loop " 474b8e80941Smrg "looping behavior with " 475b8e80941Smrg ">65535 verts\n"); 476b8e80941Smrg break; 477b8e80941Smrg case PIPE_PRIM_TRIANGLES: 478b8e80941Smrg this_count = step = max_verts - (max_verts % 3); 479b8e80941Smrg break; 480b8e80941Smrg case PIPE_PRIM_TRIANGLE_STRIP: 481b8e80941Smrg this_count = max_verts; 482b8e80941Smrg step = max_verts - 2; 483b8e80941Smrg break; 484b8e80941Smrg default: 485b8e80941Smrg debug_warn_once("unhandled primitive " 486b8e80941Smrg "max vert count, truncating\n"); 487b8e80941Smrg this_count = step = max_verts; 488b8e80941Smrg } 489b8e80941Smrg } 490b8e80941Smrg 491b8e80941Smrg cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) { 492b8e80941Smrg array.primitive_mode = info->mode; 493b8e80941Smrg array.length = this_count; 494b8e80941Smrg array.index_of_first_vertex = start; 495b8e80941Smrg } 496b8e80941Smrg job->draw_calls_queued++; 497b8e80941Smrg 498b8e80941Smrg count -= step; 499b8e80941Smrg extra_index_bias += start + step; 500b8e80941Smrg start = 0; 501b8e80941Smrg needs_drawarrays_shader_state = true; 502b8e80941Smrg } 503848b8605Smrg } 504848b8605Smrg 505b8e80941Smrg /* We shouldn't have tripped the HW_2116 bug with the GFXH-515 506b8e80941Smrg * workaround. 507b8e80941Smrg */ 508b8e80941Smrg assert(job->draw_calls_queued <= VC4_HW_2116_COUNT); 509848b8605Smrg 510b8e80941Smrg if (vc4->zsa && vc4->framebuffer.zsbuf) { 511b8e80941Smrg struct vc4_resource *rsc = 512b8e80941Smrg vc4_resource(vc4->framebuffer.zsbuf->texture); 513848b8605Smrg 514b8e80941Smrg if (vc4->zsa->base.depth.enabled) { 515b8e80941Smrg job->resolve |= PIPE_CLEAR_DEPTH; 516b8e80941Smrg rsc->initialized_buffers = PIPE_CLEAR_DEPTH; 517b8e80941Smrg } 518848b8605Smrg 519b8e80941Smrg if (vc4->zsa->base.stencil[0].enabled) { 520b8e80941Smrg job->resolve |= PIPE_CLEAR_STENCIL; 521b8e80941Smrg rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; 522b8e80941Smrg } 523848b8605Smrg } 524848b8605Smrg 525b8e80941Smrg job->resolve |= PIPE_CLEAR_COLOR0; 526b8e80941Smrg 527b8e80941Smrg /* If we've used half of the presumably 256MB CMA area, flush the job 528b8e80941Smrg * so that we don't accumulate a job that will end up not being 529b8e80941Smrg * executable. 530b8e80941Smrg */ 531b8e80941Smrg if (job->bo_space > 128 * 1024 * 1024) 532b8e80941Smrg vc4_flush(pctx); 533b8e80941Smrg 534b8e80941Smrg if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) 535b8e80941Smrg vc4_flush(pctx); 536848b8605Smrg} 537848b8605Smrg 538848b8605Smrgstatic uint32_t 539848b8605Smrgpack_rgba(enum pipe_format format, const float *rgba) 540848b8605Smrg{ 541848b8605Smrg union util_color uc; 542848b8605Smrg util_pack_color(rgba, format, &uc); 543b8e80941Smrg if (util_format_get_blocksize(format) == 2) 544b8e80941Smrg return uc.us; 545b8e80941Smrg else 546b8e80941Smrg return uc.ui[0]; 547848b8605Smrg} 548848b8605Smrg 549848b8605Smrgstatic void 550848b8605Smrgvc4_clear(struct pipe_context *pctx, unsigned buffers, 551848b8605Smrg const union pipe_color_union *color, double depth, unsigned stencil) 552848b8605Smrg{ 553848b8605Smrg struct vc4_context *vc4 = vc4_context(pctx); 554b8e80941Smrg struct vc4_job *job = vc4_get_job_for_fbo(vc4); 555b8e80941Smrg 556b8e80941Smrg if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { 557b8e80941Smrg struct vc4_resource *rsc = 558b8e80941Smrg vc4_resource(vc4->framebuffer.zsbuf->texture); 559b8e80941Smrg unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL; 560b8e80941Smrg 561b8e80941Smrg /* Clearing ZS will clear both Z and stencil, so if we're 562b8e80941Smrg * trying to clear just one then we need to draw a quad to do 563b8e80941Smrg * it instead. We need to do this before setting up 564b8e80941Smrg * tile-based clears in vc4->job, because the blitter may 565b8e80941Smrg * submit the current job. 566b8e80941Smrg */ 567b8e80941Smrg if ((zsclear == PIPE_CLEAR_DEPTH || 568b8e80941Smrg zsclear == PIPE_CLEAR_STENCIL) && 569b8e80941Smrg (rsc->initialized_buffers & ~(zsclear | job->cleared)) && 570b8e80941Smrg util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) { 571b8e80941Smrg static const union pipe_color_union dummy_color = {}; 572b8e80941Smrg 573b8e80941Smrg perf_debug("Partial clear of Z+stencil buffer, " 574b8e80941Smrg "drawing a quad instead of fast clearing\n"); 575b8e80941Smrg vc4_blitter_save(vc4); 576b8e80941Smrg util_blitter_clear(vc4->blitter, 577b8e80941Smrg vc4->framebuffer.width, 578b8e80941Smrg vc4->framebuffer.height, 579b8e80941Smrg 1, 580b8e80941Smrg zsclear, 581b8e80941Smrg &dummy_color, depth, stencil); 582b8e80941Smrg buffers &= ~zsclear; 583b8e80941Smrg if (!buffers) 584b8e80941Smrg return; 585b8e80941Smrg job = vc4_get_job_for_fbo(vc4); 586b8e80941Smrg } 587b8e80941Smrg } 588848b8605Smrg 589848b8605Smrg /* We can't flag new buffers for clearing once we've queued draws. We 590848b8605Smrg * could avoid this by using the 3d engine to clear. 591848b8605Smrg */ 592b8e80941Smrg if (job->draw_calls_queued) { 593b8e80941Smrg perf_debug("Flushing rendering to process new clear.\n"); 594b8e80941Smrg vc4_job_submit(vc4, job); 595b8e80941Smrg job = vc4_get_job_for_fbo(vc4); 596b8e80941Smrg } 597848b8605Smrg 598848b8605Smrg if (buffers & PIPE_CLEAR_COLOR0) { 599b8e80941Smrg struct vc4_resource *rsc = 600b8e80941Smrg vc4_resource(vc4->framebuffer.cbufs[0]->texture); 601b8e80941Smrg uint32_t clear_color; 602b8e80941Smrg 603b8e80941Smrg if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) { 604b8e80941Smrg /* In 565 mode, the hardware will be packing our color 605b8e80941Smrg * for us. 606b8e80941Smrg */ 607b8e80941Smrg clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, 608b8e80941Smrg color->f); 609b8e80941Smrg } else { 610b8e80941Smrg /* Otherwise, we need to do this packing because we 611b8e80941Smrg * support multiple swizzlings of RGBA8888. 612b8e80941Smrg */ 613b8e80941Smrg clear_color = 614b8e80941Smrg pack_rgba(vc4->framebuffer.cbufs[0]->format, 615b8e80941Smrg color->f); 616b8e80941Smrg } 617b8e80941Smrg job->clear_color[0] = job->clear_color[1] = clear_color; 618b8e80941Smrg rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0); 619848b8605Smrg } 620848b8605Smrg 621b8e80941Smrg if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { 622b8e80941Smrg struct vc4_resource *rsc = 623b8e80941Smrg vc4_resource(vc4->framebuffer.zsbuf->texture); 624b8e80941Smrg 625b8e80941Smrg /* Though the depth buffer is stored with Z in the high 24, 626b8e80941Smrg * for this field we just need to store it in the low 24. 627b8e80941Smrg */ 628b8e80941Smrg if (buffers & PIPE_CLEAR_DEPTH) { 629b8e80941Smrg job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, 630b8e80941Smrg depth); 631b8e80941Smrg } 632b8e80941Smrg if (buffers & PIPE_CLEAR_STENCIL) 633b8e80941Smrg job->clear_stencil = stencil; 634b8e80941Smrg 635b8e80941Smrg rsc->initialized_buffers |= (buffers & PIPE_CLEAR_DEPTHSTENCIL); 636b8e80941Smrg } 637848b8605Smrg 638b8e80941Smrg job->draw_min_x = 0; 639b8e80941Smrg job->draw_min_y = 0; 640b8e80941Smrg job->draw_max_x = vc4->framebuffer.width; 641b8e80941Smrg job->draw_max_y = vc4->framebuffer.height; 642b8e80941Smrg job->cleared |= buffers; 643b8e80941Smrg job->resolve |= buffers; 644848b8605Smrg 645848b8605Smrg vc4_start_draw(vc4); 646848b8605Smrg} 647848b8605Smrg 648848b8605Smrgstatic void 649848b8605Smrgvc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps, 650848b8605Smrg const union pipe_color_union *color, 651b8e80941Smrg unsigned x, unsigned y, unsigned w, unsigned h, 652b8e80941Smrg bool render_condition_enabled) 653848b8605Smrg{ 654848b8605Smrg fprintf(stderr, "unimpl: clear RT\n"); 655848b8605Smrg} 656848b8605Smrg 657848b8605Smrgstatic void 658848b8605Smrgvc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, 659848b8605Smrg unsigned buffers, double depth, unsigned stencil, 660b8e80941Smrg unsigned x, unsigned y, unsigned w, unsigned h, 661b8e80941Smrg bool render_condition_enabled) 662848b8605Smrg{ 663848b8605Smrg fprintf(stderr, "unimpl: clear DS\n"); 664848b8605Smrg} 665848b8605Smrg 666848b8605Smrgvoid 667848b8605Smrgvc4_draw_init(struct pipe_context *pctx) 668848b8605Smrg{ 669848b8605Smrg pctx->draw_vbo = vc4_draw_vbo; 670848b8605Smrg pctx->clear = vc4_clear; 671848b8605Smrg pctx->clear_render_target = vc4_clear_render_target; 672848b8605Smrg pctx->clear_depth_stencil = vc4_clear_depth_stencil; 673848b8605Smrg} 674